Changeset 1232


Ignore:
Timestamp:
Jul 28, 2011, 3:01:53 PM (8 years ago)
Author:
vla24
Message:

Updated symbol table implementation.

Location:
proto
Files:
1 deleted
13 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/Makefile

    r1231 r1232  
    33PABLO_SRCFILE=../parabix2/parabix2_pablo.py
    44PABLO_COMPILER=../Compiler/pablomain.py
     5PABLO_SYMTAB_HASH=parabix2_symtab_hash.py
    56PABLO_SYMTAB_PBS=parabix2_symtab_pbs.py
    67PABLO_SYMTAB_PBS_LOG=parabix2_symtab_pbs_log.py
     
    2728        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(HASH_SYMBOLTABLE_TEMPLATE) -o $(OUTFILE)
    2829
    29 symtab_id:      $(PABLO_SRCFILE) # Paralel bitstream based group sorting
    30         python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SRCFILE) -t $(SYMBOLTABLE_IDENTITY_TEMPLATE) -o $(OUTFILE)
     30symtab_id:      $(PABLO_SYMTAB_HASH) # Paralel bitstream based group sorting
     31        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_HASH) -t $(SYMBOLTABLE_IDENTITY_TEMPLATE) -o $(OUTFILE)
    3132
    3233symtab_pbgs_id: $(PABLO_SYMTAB_PBS) # Paralel bitstream based group sorting
  • proto/SymbolTable/README_SymbolTable

    r1231 r1232  
    77---------------------------------
    88
    9 LENGTH SORTING
    10 
    11 This is Ken's implementation. We are using mask-equal-and/or method
    12 To compile      : make symtab_ls
    13 Data structure  : lib/symtab/ls_symbol_table*.*
    14 
    15 ---------------------------------
    16 
    179HASH TABLE (Jestress)
    1810
     
    2012To compile      : make symtab_hash
    2113Data structure  : lib/symtab/hash_symbol_table.*
     14
     15---------------------------------
     16
     17LENGTH SORTING
     18
     191. MASK-EQUAL-AND/OR
     20This is Ken's implementation. We are using mask-equal-and/or method
     21To compile      : make symtab_ls
     22Data structure  : lib/symtab/ls_symbol_table*.*
     23
     242. USE_IDENTITY_SORT    f(L) = L
     25Input:
     26- a bitstream that marks start positions
     27- a bitstream that marks end positions
     28Sequentially scan symbols and group the symbols based on their length.
     29To compile      : make symtab_id
     30Data structure  : lib/symtab/pbgs_identity_symbol_table.h
    2231
    2332---------------------------------
  • proto/SymbolTable/automate-build.sh

    r1231 r1232  
    22cd src
    33make all
    4 ./xmlwf ../test/test5.xml
     4./xmlwf ../test/test.xml
    55cd ..
    66
  • proto/SymbolTable/parabix2_symtab_pbs.py

    r1228 r1232  
    4545  xF4_scope = 0
    4646  xEF_scope = 0
    47  
    48   FFFE_FFFF = 0
    49   error = 0
    5047
    5148class Lex ():
     
    5653        CRLF = 0
    5754        RefStart = 0
    58         Semicolon = 0 
     55        Semicolon = 0
    5956        Colon = 0
    6057        LAngle = 0
     
    7774        Hex = 0
    7875        WS = 0
    79         error = 0
    8076
    8177class Scope1 ():
     
    9793        PI_ends = 0
    9894        CtCDPI_mask = 0
    99         error = 0
    10095
    10196class Ref_Callouts():
     
    106101        HexRef_starts = 0
    107102        HexRef_ends = 0
    108         error = 0
    109103
    110104class Hash_data():
     
    138132        EmptyTag_marks = 0
    139133        EndTag_marks = 0
    140         LAngleFollow = 0
    141         error = 0
    142 
    143 class Basis_bits():     
     134
     135class Basis_bits():
    144136        bit_0 = 0
    145137        bit_1 = 0
     
    150142        bit_6 = 0
    151143        bit_7 = 0
    152        
     144
    153145class Check_streams():
    154146        misc_mask = 0
    155147        non_ascii_name_starts = 0
    156148        non_ascii_names = 0
    157         tag_marks = 0
    158         name_follows = 0
    159         att_refs = 0
    160         error_mask = 0
     149        tag_marks = 0
     150        name_follows = 0
     151        att_refs = 0
    161152
    162153class Xml_names():
    163154        namespace_error = 0
    164155
    165 def Classify_bytes_Validate_utf8(basis_bits, lex, u8): 
     156def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
    166157        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    167158        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
     
    256247        temp65 = (temp64 & temp60);
    257248        lex.Hex = (temp62 | temp65);
    258         lex.error = x00_x1F &~ lex.WS
    259        
     249        lex_error = x00_x1F &~ lex.WS
     250        if lex_error & EOF_mask:
     251                error_tracker.NoteError("Error: illegal character", lex_error)
     252
     253
    260254        ### Validate_utf8(basis_bits, u8):
    261255        u8.unibyte = (~basis_bits.bit_0);
    262256        u8.suffix = 0
    263         u8.error = 0
    264         u8.FFFE_FFFF = 0
     257        u8_error = 0
     258        u8_FFFE_FFFF = 0
    265259        u8anyscope = 0 #local
    266260        if basis_bits.bit_0:
     
    277271                temp71 = (u8.prefix4 & temp70);
    278272                u8.badprefix = (temp68 | temp71);
    279                 u8.error = u8.badprefix
     273                u8_error = u8.badprefix
    280274                u8.scope22 = bitutil.Advance(u8.prefix2)
    281275                u8anyscope = u8.scope22
     
    307301                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
    308302                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
    309                
     303
    310304                        u8error1 = u8.xE0_scope & u8.x80_x9F
    311305                        u8error2 = u8.xED_scope & u8.xA0_xBF
    312306                        u8error3 = u8.xF0_scope & u8.x80_x8F
    313307                        u8error4 = u8.xF4_scope & u8.x90_xBF
    314        
    315                         u8.error |= u8error1 | u8error2 | u8error3 | u8error4
     308
     309                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
    316310
    317311                        EF_BF_pending = bitutil.Advance(u8.xEF_scope & u8.xBF)
    318312
    319                         u8.FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
    320         u8mismatch = u8anyscope ^ u8.suffix
    321         u8.error |= u8mismatch
    322        
     313                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
     314                u8mismatch = u8anyscope ^ u8.suffix
     315                u8_error |= u8mismatch | u8_FFFE_FFFF
     316                if u8_error:
     317                        error_tracker.NoteError("UTF-8 error found", (u8_error))
     318
     319
    323320def Add_scope_streams(lex, scope1):
    324321        #scope1.LAngle = bitutil.Advance(lex.LAngle)
     
    328325        w = lex.Hyphen | lex.QMark
    329326        v1 = bitutil.Advance(v)
    330         w1 = bitutil.Advance(w)
     327        w1 = bitutil.Advance(w)
    331328        scope1.LAngle = v1 &~ w1
    332329        scope1.Hyphen = v1 & w1
    333330        scope1.QMark = w1 &~ v1
     331        scope1.RefStart = 0 # default
    334332
    335333def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
     
    344342        ctCDPI_Callouts.PI_ends = 0
    345343        ctCDPI_Callouts.CtCDPI_mask = 0
    346         ctCDPI_Callouts.error = 0
     344        ctCDPI_error = 0
    347345        CtCDPI_starts = 0
    348346        Ct_errors = 0
     
    367365                CD_Ct_Cursor = bitutil.Advance(CtCDPI_Cursor & ~PI_Cursor)
    368366                CD_Cursor = CD_Ct_Cursor & lex.LBracket
    369                 Ct_Cursor = CD_Ct_Cursor & lex.Hyphen 
     367                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
    370368                ctCDPI_Callouts.PI_starts |= PI_Cursor
    371369                ctCDPI_Callouts.CD_starts |= CD_Cursor
    372370                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
    373                 Ct_Cursor = bitutil.Advance(Ct_Cursor) 
    374                 Ct_errors |= Ct_Cursor & ~ lex.Hyphen 
    375                 # Advance twice past <!--, so that we don't treat <!--- 
    376                 # as being a terminated comment.
     371                Ct_Cursor = bitutil.Advance(Ct_Cursor)
     372                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
     373                # Advance twice past <!--, so that we don't treat <!---
     374                # as being a terminated comment.
    377375                Ct_Cursor = bitutil.Advance(bitutil.Advance(Ct_Cursor))
    378376                PI_Cursor = bitutil.Advance(PI_Cursor)
     
    388386                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
    389387                CtCDPI_Cursor = bitutil.ScanTo(CtCDPI_Cursor, CtCDPI_start)
    390        
    391                 ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts             
     388
     389                ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
    392390                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
    393                 ctCDPI_Callouts.error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
    394                 ctCDPI_Callouts.error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
    395                 ctCDPI_Callouts.error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
     391                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
     392                ctCDPI_error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
     393                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
    396394                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
    397                 ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
    398                
     395                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
     396
     397        if ctCDPI_error:
     398                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
     399
    399400        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
    400401
    401402def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
    402403
    403        
     404
    404405        # Delimiters for scans.
    405406        DQuoteDelim = lex.DQuote | lex.LAngle
    406407        SQuoteDelim = lex.SQuote | lex.LAngle
    407408        AttListDelim = lex.Slash | lex.RAngle
    408        
     409
    409410        # Start the parallel parsing by inspecting the character
    410411        # after the opening "<" of a tag.
    411         tag_Callouts.LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
    412         tag_Callouts.ElemName_starts = tag_Callouts.LAngleFollow & ~lex.Slash
    413         tag_Callouts.EndTag_marks = tag_Callouts.LAngleFollow & lex.Slash
    414        
     412        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
     413        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
     414        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
     415
    415416        # Start Tag/Empty Element Tag Parsing
    416417
     
    420421        # Mark any occurrences of null names as errors.
    421422        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
    422        
     423
    423424        # Initialize the accumulators for attribute name and value positions.
    424         tag_Callouts.AttName_starts = 0 
     425        tag_Callouts.AttName_starts = 0
    425426        tag_Callouts.AttName_ends = 0
    426427        EqToCheck = 0
     
    446447                tag_Callouts.AttName_ends |= AttNameFollow
    447448                # Scan through WS to the expected '=' delimiter.
    448                 EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     449                # EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     450                # But use if test to optimize.
     451                if AttNameFollow & lex.WS:
     452                        EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     453                else: EqExpected = AttNameFollow
    449454                EqToCheck |= EqExpected
    450455                AttValPos = bitutil.ScanThru(EqExpected, EqExpected | lex.WS)
     
    458463                AttValFollow = bitutil.Advance(AttValEnd)
    459464                tag_Callouts.AttVal_ends |= AttValFollow
     465                #  AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     466                if AttValFollow & lex.WS:
     467                        AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     468                else: AfterWS = AttValFollow
    460469                AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
    461470                AttListEnd |= AfterWS & AttListDelim
     
    466475        # Mark any "/" characters found as the ends of empty element tags.
    467476        tag_Callouts.EmptyTag_marks = bitutil.Advance(AttListEnd & lex.Slash)
    468        
     477
    469478        # Check for errors.
    470479        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
     
    476485
    477486        # End Tag Parsing
    478         EndTagEnds = bitutil.ScanThru(bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan), lex.WS)
     487
     488        EndTagEnds = bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
     489        if EndTagEnds & lex.WS:
     490                EndTagEnds = bitutil.ScanThru(EndTagEnds, lex.WS)
    479491        ParseError |= EndTagEnds & ~lex.RAngle
    480         tag_Callouts.error = ParseError
    481                
     492        if ParseError:
     493                error_tracker.NoteError("Tag parsing error found", (ParseError))
     494
     495
    482496        # Attribute value spans
    483497        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
    484                        
     498
    485499def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
    486500        ref_Callouts.GenRef_starts = 0
     
    490504        ref_Callouts.HexRef_starts = 0
    491505        ref_Callouts.HexRef_ends = 0
    492         ref_Callouts.error = 0
     506        ref_error = 0
    493507
    494508        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
     
    501515                HexRef3 = NumRef3 & lex.x
    502516                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
    503                 ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3) 
     517                ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3)
    504518                ref_Callouts.GenRef_ends = bitutil.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
    505519                ref_Callouts.DecRef_ends = bitutil.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
     
    512526                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
    513527                ref_error3 = ref_ends &~ lex.Semicolon
    514                 ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
     528                ref_error = ref_error1 | ref_error2 | ref_error3
     529                if ref_error:
     530                        error_tracker.NoteError("Reference error found", (ref_error))
     531
     532
    515533
    516534def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
     
    530548        colon2_err = bitutil.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
    531549        ncname_err = ncname_stream & lex.Colon
    532         xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    533                        
     550        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
     551        if namespace_error:
     552                error_tracker.NoteError("error found", namespace_error)
     553
     554
    534555        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    535556        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    536    
     557
    537558def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
    538559    # Ensure that no occurrence of ]]> occurs outside of markup.
    539560    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
    540            
     561
    541562    # Consolidate and check for errors
    542     check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    543 
    544     check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     563    if CD_end_error:
     564                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
     565
     566
     567    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
    545568    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    546569    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
     570
    547571
    548572def Form_Length_Group_Bitstreams(tag_Callouts):
     
    640664    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
    641665
    642 #def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, check_streams, tag_Callouts, ref_Callouts, xml_names):
    643666def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams, hash_data):
    644667       
  • proto/SymbolTable/parabix2_symtab_pbs_adv.py

    r1228 r1232  
    4545  xF4_scope = 0
    4646  xEF_scope = 0
    47  
    48   FFFE_FFFF = 0
    49   error = 0
    5047
    5148class Lex ():
     
    7774        Hex = 0
    7875        WS = 0
    79         error = 0
    8076
    8177class Scope1 ():
     
    9793        PI_ends = 0
    9894        CtCDPI_mask = 0
    99         error = 0
    10095
    10196class Ref_Callouts():
     
    106101        HexRef_starts = 0
    107102        HexRef_ends = 0
    108         error = 0
    109103
    110104class Hash_data():
     
    138132        EmptyTag_marks = 0
    139133        EndTag_marks = 0
    140         LAngleFollow = 0
    141         error = 0
    142 
    143 class Basis_bits():     
     134
     135class Basis_bits():
    144136        bit_0 = 0
    145137        bit_1 = 0
     
    150142        bit_6 = 0
    151143        bit_7 = 0
    152        
     144
    153145class Check_streams():
    154146        misc_mask = 0
    155147        non_ascii_name_starts = 0
    156148        non_ascii_names = 0
    157         tag_marks = 0
    158         name_follows = 0
    159         att_refs = 0
    160         error_mask = 0
     149        tag_marks = 0
     150        name_follows = 0
     151        att_refs = 0
    161152
    162153class Xml_names():
    163154        namespace_error = 0
    164155
    165 def Classify_bytes_Validate_utf8(basis_bits, lex, u8): 
     156def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
    166157        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    167158        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
     
    256247        temp65 = (temp64 & temp60);
    257248        lex.Hex = (temp62 | temp65);
    258         lex.error = x00_x1F &~ lex.WS
    259        
     249        lex_error = x00_x1F &~ lex.WS
     250        if lex_error & EOF_mask:
     251                error_tracker.NoteError("Error: illegal character", lex_error)
     252
     253
    260254        ### Validate_utf8(basis_bits, u8):
    261255        u8.unibyte = (~basis_bits.bit_0);
    262256        u8.suffix = 0
    263         u8.error = 0
    264         u8.FFFE_FFFF = 0
     257        u8_error = 0
     258        u8_FFFE_FFFF = 0
    265259        u8anyscope = 0 #local
    266260        if basis_bits.bit_0:
     
    277271                temp71 = (u8.prefix4 & temp70);
    278272                u8.badprefix = (temp68 | temp71);
    279                 u8.error = u8.badprefix
     273                u8_error = u8.badprefix
    280274                u8.scope22 = bitutil.Advance(u8.prefix2)
    281275                u8anyscope = u8.scope22
     
    307301                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
    308302                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
    309                
     303
    310304                        u8error1 = u8.xE0_scope & u8.x80_x9F
    311305                        u8error2 = u8.xED_scope & u8.xA0_xBF
    312306                        u8error3 = u8.xF0_scope & u8.x80_x8F
    313307                        u8error4 = u8.xF4_scope & u8.x90_xBF
    314        
    315                         u8.error |= u8error1 | u8error2 | u8error3 | u8error4
     308
     309                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
    316310
    317311                        EF_BF_pending = bitutil.Advance(u8.xEF_scope & u8.xBF)
    318312
    319                         u8.FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
    320         u8mismatch = u8anyscope ^ u8.suffix
    321         u8.error |= u8mismatch
    322        
     313                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
     314                u8mismatch = u8anyscope ^ u8.suffix
     315                u8_error |= u8mismatch | u8_FFFE_FFFF
     316                if u8_error:
     317                        error_tracker.NoteError("UTF-8 error found", (u8_error))
     318
     319
    323320def Add_scope_streams(lex, scope1):
    324321        #scope1.LAngle = bitutil.Advance(lex.LAngle)
     
    328325        w = lex.Hyphen | lex.QMark
    329326        v1 = bitutil.Advance(v)
    330         w1 = bitutil.Advance(w)
     327        w1 = bitutil.Advance(w)
    331328        scope1.LAngle = v1 &~ w1
    332329        scope1.Hyphen = v1 & w1
    333330        scope1.QMark = w1 &~ v1
     331        scope1.RefStart = 0 # default
    334332
    335333def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
     
    344342        ctCDPI_Callouts.PI_ends = 0
    345343        ctCDPI_Callouts.CtCDPI_mask = 0
    346         ctCDPI_Callouts.error = 0
     344        ctCDPI_error = 0
    347345        CtCDPI_starts = 0
    348346        Ct_errors = 0
     
    367365                CD_Ct_Cursor = bitutil.Advance(CtCDPI_Cursor & ~PI_Cursor)
    368366                CD_Cursor = CD_Ct_Cursor & lex.LBracket
    369                 Ct_Cursor = CD_Ct_Cursor & lex.Hyphen 
     367                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
    370368                ctCDPI_Callouts.PI_starts |= PI_Cursor
    371369                ctCDPI_Callouts.CD_starts |= CD_Cursor
    372370                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
    373                 Ct_Cursor = bitutil.Advance(Ct_Cursor) 
    374                 Ct_errors |= Ct_Cursor & ~ lex.Hyphen 
    375                 # Advance twice past <!--, so that we don't treat <!--- 
    376                 # as being a terminated comment.
     371                Ct_Cursor = bitutil.Advance(Ct_Cursor)
     372                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
     373                # Advance twice past <!--, so that we don't treat <!---
     374                # as being a terminated comment.
    377375                Ct_Cursor = bitutil.Advance(bitutil.Advance(Ct_Cursor))
    378376                PI_Cursor = bitutil.Advance(PI_Cursor)
     
    388386                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
    389387                CtCDPI_Cursor = bitutil.ScanTo(CtCDPI_Cursor, CtCDPI_start)
    390        
    391                 ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts             
     388
     389                ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
    392390                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
    393                 ctCDPI_Callouts.error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
    394                 ctCDPI_Callouts.error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
    395                 ctCDPI_Callouts.error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
     391                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
     392                ctCDPI_error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
     393                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
    396394                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
    397                 ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
    398                
     395                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
     396
     397        if ctCDPI_error:
     398                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
     399
    399400        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
    400401
    401402def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
    402403
    403        
     404
    404405        # Delimiters for scans.
    405406        DQuoteDelim = lex.DQuote | lex.LAngle
    406407        SQuoteDelim = lex.SQuote | lex.LAngle
    407408        AttListDelim = lex.Slash | lex.RAngle
    408        
     409
    409410        # Start the parallel parsing by inspecting the character
    410411        # after the opening "<" of a tag.
    411         tag_Callouts.LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
    412         tag_Callouts.ElemName_starts = tag_Callouts.LAngleFollow & ~lex.Slash
    413         tag_Callouts.EndTag_marks = tag_Callouts.LAngleFollow & lex.Slash
    414        
     412        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
     413        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
     414        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
     415
    415416        # Start Tag/Empty Element Tag Parsing
    416417
     
    420421        # Mark any occurrences of null names as errors.
    421422        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
    422        
     423
    423424        # Initialize the accumulators for attribute name and value positions.
    424         tag_Callouts.AttName_starts = 0 
     425        tag_Callouts.AttName_starts = 0
    425426        tag_Callouts.AttName_ends = 0
    426427        EqToCheck = 0
     
    446447                tag_Callouts.AttName_ends |= AttNameFollow
    447448                # Scan through WS to the expected '=' delimiter.
    448                 EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     449                # EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     450                # But use if test to optimize.
     451                if AttNameFollow & lex.WS:
     452                        EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     453                else: EqExpected = AttNameFollow
    449454                EqToCheck |= EqExpected
    450455                AttValPos = bitutil.ScanThru(EqExpected, EqExpected | lex.WS)
     
    458463                AttValFollow = bitutil.Advance(AttValEnd)
    459464                tag_Callouts.AttVal_ends |= AttValFollow
     465                #  AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     466                if AttValFollow & lex.WS:
     467                        AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     468                else: AfterWS = AttValFollow
    460469                AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
    461470                AttListEnd |= AfterWS & AttListDelim
     
    466475        # Mark any "/" characters found as the ends of empty element tags.
    467476        tag_Callouts.EmptyTag_marks = bitutil.Advance(AttListEnd & lex.Slash)
    468        
     477
    469478        # Check for errors.
    470479        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
     
    476485
    477486        # End Tag Parsing
    478         EndTagEnds = bitutil.ScanThru(bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan), lex.WS)
     487
     488        EndTagEnds = bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
     489        if EndTagEnds & lex.WS:
     490                EndTagEnds = bitutil.ScanThru(EndTagEnds, lex.WS)
    479491        ParseError |= EndTagEnds & ~lex.RAngle
    480         tag_Callouts.error = ParseError
    481                
     492        if ParseError:
     493                error_tracker.NoteError("Tag parsing error found", (ParseError))
     494
     495
    482496        # Attribute value spans
    483497        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
    484                        
     498
    485499def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
    486500        ref_Callouts.GenRef_starts = 0
     
    490504        ref_Callouts.HexRef_starts = 0
    491505        ref_Callouts.HexRef_ends = 0
    492         ref_Callouts.error = 0
     506        ref_error = 0
    493507
    494508        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
     
    501515                HexRef3 = NumRef3 & lex.x
    502516                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
    503                 ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3) 
     517                ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3)
    504518                ref_Callouts.GenRef_ends = bitutil.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
    505519                ref_Callouts.DecRef_ends = bitutil.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
     
    512526                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
    513527                ref_error3 = ref_ends &~ lex.Semicolon
    514                 ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
     528                ref_error = ref_error1 | ref_error2 | ref_error3
     529                if ref_error:
     530                        error_tracker.NoteError("Reference error found", (ref_error))
     531
     532
    515533
    516534def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
     
    530548        colon2_err = bitutil.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
    531549        ncname_err = ncname_stream & lex.Colon
    532         xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    533                        
     550        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
     551        if namespace_error:
     552                error_tracker.NoteError("error found", namespace_error)
     553
     554
    534555        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    535556        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    536    
     557
    537558def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
    538559    # Ensure that no occurrence of ]]> occurs outside of markup.
    539560    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
    540            
     561
    541562    # Consolidate and check for errors
    542     check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    543 
    544     check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     563    if CD_end_error:
     564                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
     565
     566
     567    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
    545568    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    546569    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
     
    627650    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
    628651
    629 #def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, check_streams, tag_Callouts, ref_Callouts, xml_names):
    630652def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams, hash_data):
    631653       
  • proto/SymbolTable/parabix2_symtab_pbs_div.py

    r1228 r1232  
    4545  xF4_scope = 0
    4646  xEF_scope = 0
    47  
    48   FFFE_FFFF = 0
    49   error = 0
    5047
    5148class Lex ():
     
    5653        CRLF = 0
    5754        RefStart = 0
    58         Semicolon = 0 
     55        Semicolon = 0
    5956        Colon = 0
    6057        LAngle = 0
     
    7774        Hex = 0
    7875        WS = 0
    79         error = 0
    8076
    8177class Scope1 ():
     
    9793        PI_ends = 0
    9894        CtCDPI_mask = 0
    99         error = 0
    10095
    10196class Ref_Callouts():
     
    106101        HexRef_starts = 0
    107102        HexRef_ends = 0
    108         error = 0
    109 
    110 class Hash_data():
    111         Hash_value = 0
    112103
    113104class Tag_Callouts():
    114105        ElemName_starts = 0
    115106        ElemName_ends = 0
    116         ElemName_ends_1_to_4 = 0
    117         ElemName_ends_5_to_8 = 0
    118         ElemName_ends_9_to_12 = 0
    119         ElemName_ends_13_to_16 = 0
     107        ElemName_ends_1 = 0
     108        ElemName_ends_2 = 0
     109        ElemName_ends_3 = 0
     110        ElemName_ends_4 = 0
     111        ElemName_ends_5 = 0
     112        ElemName_ends_6 = 0
     113        ElemName_ends_7 = 0
     114        ElemName_ends_8 = 0
     115        ElemName_ends_9 = 0
     116        ElemName_ends_10 = 0
     117        ElemName_ends_11 = 0
     118        ElemName_ends_12 = 0
     119        ElemName_ends_13 = 0
     120        ElemName_ends_14 = 0
     121        ElemName_ends_15 = 0
     122        ElemName_ends_16 = 0
    120123        ElemName_ends_17_and_longer = 0
    121124        AttName_starts = 0
     
    126129        EmptyTag_marks = 0
    127130        EndTag_marks = 0
    128         LAngleFollow = 0
    129         error = 0
    130 
    131 class Basis_bits():     
     131
     132class Basis_bits():
    132133        bit_0 = 0
    133134        bit_1 = 0
     
    138139        bit_6 = 0
    139140        bit_7 = 0
    140        
     141
    141142class Check_streams():
    142143        misc_mask = 0
    143144        non_ascii_name_starts = 0
    144145        non_ascii_names = 0
    145         tag_marks = 0
    146         name_follows = 0
    147         att_refs = 0
    148         error_mask = 0
     146        tag_marks = 0
     147        name_follows = 0
     148        att_refs = 0
    149149
    150150class Xml_names():
    151151        namespace_error = 0
    152152
    153 def Classify_bytes_Validate_utf8(basis_bits, lex, u8): 
     153def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
    154154        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    155155        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
     
    244244        temp65 = (temp64 & temp60);
    245245        lex.Hex = (temp62 | temp65);
    246         lex.error = x00_x1F &~ lex.WS
    247        
     246        lex_error = x00_x1F &~ lex.WS
     247        if lex_error & EOF_mask:
     248                error_tracker.NoteError("Error: illegal character", lex_error)
     249
     250
    248251        ### Validate_utf8(basis_bits, u8):
    249252        u8.unibyte = (~basis_bits.bit_0);
    250253        u8.suffix = 0
    251         u8.error = 0
    252         u8.FFFE_FFFF = 0
     254        u8_error = 0
     255        u8_FFFE_FFFF = 0
    253256        u8anyscope = 0 #local
    254257        if basis_bits.bit_0:
     
    265268                temp71 = (u8.prefix4 & temp70);
    266269                u8.badprefix = (temp68 | temp71);
    267                 u8.error = u8.badprefix
     270                u8_error = u8.badprefix
    268271                u8.scope22 = bitutil.Advance(u8.prefix2)
    269272                u8anyscope = u8.scope22
     
    295298                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
    296299                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
    297                
     300
    298301                        u8error1 = u8.xE0_scope & u8.x80_x9F
    299302                        u8error2 = u8.xED_scope & u8.xA0_xBF
    300303                        u8error3 = u8.xF0_scope & u8.x80_x8F
    301304                        u8error4 = u8.xF4_scope & u8.x90_xBF
    302        
    303                         u8.error |= u8error1 | u8error2 | u8error3 | u8error4
     305
     306                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
    304307
    305308                        EF_BF_pending = bitutil.Advance(u8.xEF_scope & u8.xBF)
    306309
    307                         u8.FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
    308         u8mismatch = u8anyscope ^ u8.suffix
    309         u8.error |= u8mismatch
    310        
     310                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
     311                u8mismatch = u8anyscope ^ u8.suffix
     312                u8_error |= u8mismatch | u8_FFFE_FFFF
     313                if u8_error:
     314                        error_tracker.NoteError("UTF-8 error found", (u8_error))
     315
     316
    311317def Add_scope_streams(lex, scope1):
    312318        #scope1.LAngle = bitutil.Advance(lex.LAngle)
     
    316322        w = lex.Hyphen | lex.QMark
    317323        v1 = bitutil.Advance(v)
    318         w1 = bitutil.Advance(w)
     324        w1 = bitutil.Advance(w)
    319325        scope1.LAngle = v1 &~ w1
    320326        scope1.Hyphen = v1 & w1
    321327        scope1.QMark = w1 &~ v1
     328        scope1.RefStart = 0 # default
    322329
    323330def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
     
    332339        ctCDPI_Callouts.PI_ends = 0
    333340        ctCDPI_Callouts.CtCDPI_mask = 0
    334         ctCDPI_Callouts.error = 0
     341        ctCDPI_error = 0
    335342        CtCDPI_starts = 0
    336343        Ct_errors = 0
     
    355362                CD_Ct_Cursor = bitutil.Advance(CtCDPI_Cursor & ~PI_Cursor)
    356363                CD_Cursor = CD_Ct_Cursor & lex.LBracket
    357                 Ct_Cursor = CD_Ct_Cursor & lex.Hyphen 
     364                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
    358365                ctCDPI_Callouts.PI_starts |= PI_Cursor
    359366                ctCDPI_Callouts.CD_starts |= CD_Cursor
    360367                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
    361                 Ct_Cursor = bitutil.Advance(Ct_Cursor) 
    362                 Ct_errors |= Ct_Cursor & ~ lex.Hyphen 
    363                 # Advance twice past <!--, so that we don't treat <!--- 
    364                 # as being a terminated comment.
     368                Ct_Cursor = bitutil.Advance(Ct_Cursor)
     369                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
     370                # Advance twice past <!--, so that we don't treat <!---
     371                # as being a terminated comment.
    365372                Ct_Cursor = bitutil.Advance(bitutil.Advance(Ct_Cursor))
    366373                PI_Cursor = bitutil.Advance(PI_Cursor)
     
    376383                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
    377384                CtCDPI_Cursor = bitutil.ScanTo(CtCDPI_Cursor, CtCDPI_start)
    378        
    379                 ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts             
     385
     386                ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
    380387                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
    381                 ctCDPI_Callouts.error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
    382                 ctCDPI_Callouts.error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
    383                 ctCDPI_Callouts.error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
     388                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
     389                ctCDPI_error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
     390                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
    384391                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
    385                 ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
    386                
     392                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
     393
     394        if ctCDPI_error:
     395                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
     396
    387397        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
    388398
    389399def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
    390400
    391        
     401
    392402        # Delimiters for scans.
    393403        DQuoteDelim = lex.DQuote | lex.LAngle
    394404        SQuoteDelim = lex.SQuote | lex.LAngle
    395405        AttListDelim = lex.Slash | lex.RAngle
    396        
     406
    397407        # Start the parallel parsing by inspecting the character
    398408        # after the opening "<" of a tag.
    399         tag_Callouts.LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
    400         tag_Callouts.ElemName_starts = tag_Callouts.LAngleFollow & ~lex.Slash
    401         tag_Callouts.EndTag_marks = tag_Callouts.LAngleFollow & lex.Slash
    402        
     409        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
     410        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
     411        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
     412
    403413        # Start Tag/Empty Element Tag Parsing
    404414
     
    408418        # Mark any occurrences of null names as errors.
    409419        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
    410        
     420
    411421        # Initialize the accumulators for attribute name and value positions.
    412         tag_Callouts.AttName_starts = 0 
     422        tag_Callouts.AttName_starts = 0
    413423        tag_Callouts.AttName_ends = 0
    414424        EqToCheck = 0
     
    434444                tag_Callouts.AttName_ends |= AttNameFollow
    435445                # Scan through WS to the expected '=' delimiter.
    436                 EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     446                # EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     447                # But use if test to optimize.
     448                if AttNameFollow & lex.WS:
     449                        EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     450                else: EqExpected = AttNameFollow
    437451                EqToCheck |= EqExpected
    438452                AttValPos = bitutil.ScanThru(EqExpected, EqExpected | lex.WS)
     
    446460                AttValFollow = bitutil.Advance(AttValEnd)
    447461                tag_Callouts.AttVal_ends |= AttValFollow
     462                #  AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     463                if AttValFollow & lex.WS:
     464                        AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     465                else: AfterWS = AttValFollow
    448466                AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
    449467                AttListEnd |= AfterWS & AttListDelim
     
    454472        # Mark any "/" characters found as the ends of empty element tags.
    455473        tag_Callouts.EmptyTag_marks = bitutil.Advance(AttListEnd & lex.Slash)
    456        
     474
    457475        # Check for errors.
    458476        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
     
    464482
    465483        # End Tag Parsing
    466         EndTagEnds = bitutil.ScanThru(bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan), lex.WS)
     484
     485        EndTagEnds = bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
     486        if EndTagEnds & lex.WS:
     487                EndTagEnds = bitutil.ScanThru(EndTagEnds, lex.WS)
    467488        ParseError |= EndTagEnds & ~lex.RAngle
    468         tag_Callouts.error = ParseError
    469                
     489        if ParseError:
     490                error_tracker.NoteError("Tag parsing error found", (ParseError))
     491
     492
    470493        # Attribute value spans
    471494        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
    472                        
     495
    473496def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
    474497        ref_Callouts.GenRef_starts = 0
     
    478501        ref_Callouts.HexRef_starts = 0
    479502        ref_Callouts.HexRef_ends = 0
    480         ref_Callouts.error = 0
     503        ref_error = 0
    481504
    482505        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
     
    489512                HexRef3 = NumRef3 & lex.x
    490513                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
    491                 ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3) 
     514                ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3)
    492515                ref_Callouts.GenRef_ends = bitutil.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
    493516                ref_Callouts.DecRef_ends = bitutil.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
     
    500523                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
    501524                ref_error3 = ref_ends &~ lex.Semicolon
    502                 ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
     525                ref_error = ref_error1 | ref_error2 | ref_error3
     526                if ref_error:
     527                        error_tracker.NoteError("Reference error found", (ref_error))
     528
     529
    503530
    504531def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
     
    518545        colon2_err = bitutil.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
    519546        ncname_err = ncname_stream & lex.Colon
    520         xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    521                        
     547        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
     548        if namespace_error:
     549                error_tracker.NoteError("error found", namespace_error)
     550
     551
    522552        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    523553        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    524    
     554
    525555def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
    526556    # Ensure that no occurrence of ]]> occurs outside of markup.
    527557    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
    528            
     558
    529559    # Consolidate and check for errors
    530     check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    531 
    532     check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     560    if CD_end_error:
     561                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
     562
     563
     564    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
    533565    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    534566    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
     
    539571    remaining_ends = tag_Callouts.ElemName_ends
    540572    temp = tag_Callouts.ElemName_starts
     573    temp32 = bitutil.Advance32(temp)
    541574
    542575    # Group symbols of length 1
    543     temp = bitutil.Advance(temp)
    544     tag_Callouts.ElemName_ends_1 = temp & remaining_ends
     576    tag_Callouts.ElemName_ends_1 = interpose32(temp, temp32, 1) & remaining_ends
    545577    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_1
    546578
    547579    # Group symbols of length 2
    548     temp = bitutil.Advance(temp)
    549     tag_Callouts.ElemName_ends_2 = temp & remaining_ends
     580    tag_Callouts.ElemName_ends_2 = interpose32(temp, temp32, 2) & remaining_ends
    550581    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_2
    551582
    552583    # Group symbols of length 3
    553     temp = bitutil.Advance(temp)
    554     tag_Callouts.ElemName_ends_3 = temp & remaining_ends
     584    tag_Callouts.ElemName_ends_3 = interpose32(temp, temp32, 3) & remaining_ends
    555585    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_3
    556586
    557587    # Group symbols of length 4
    558     temp = bitutil.Advance(temp)
    559     tag_Callouts.ElemName_ends_4 = temp & remaining_ends
     588    tag_Callouts.ElemName_ends_4 = interpose32(temp, temp32, 4) & remaining_ends
    560589    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_4
    561590
    562591    # Group symbols of length 5
    563     temp = bitutil.Advance(temp)
    564     tag_Callouts.ElemName_ends_5 = temp & remaining_ends
     592    tag_Callouts.ElemName_ends_5 = interpose32(temp, temp32, 5) & remaining_ends
    565593    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_5
    566594
    567595    # Group symbols of length 6
    568     temp = bitutil.Advance(temp)
    569     tag_Callouts.ElemName_ends_6 = temp & remaining_ends
     596    tag_Callouts.ElemName_ends_6 = interpose32(temp, temp32, 6) & remaining_ends
    570597    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_6
    571598
    572599    # Group symbols of length 7
    573     temp = bitutil.Advance(temp)
    574     tag_Callouts.ElemName_ends_7 = temp & remaining_ends
     600    tag_Callouts.ElemName_ends_7 = interpose32(temp, temp32, 7) & remaining_ends
    575601    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_7
    576602
    577603    # Group symbols of length 8
    578     temp = bitutil.Advance(temp)
    579     tag_Callouts.ElemName_ends_8 = temp & remaining_ends
     604    tag_Callouts.ElemName_ends_8 = interpose32(temp, temp32, 8) & remaining_ends
    580605    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_8
    581606
    582607    # Group symbols of length 9
    583     temp = bitutil.Advance(temp)
    584     tag_Callouts.ElemName_ends_9 = temp & remaining_ends
     608    tag_Callouts.ElemName_ends_9 = interpose32(temp, temp32, 9) & remaining_ends
    585609    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_9
    586610
    587611    # Group symbols of length 10
    588     temp = bitutil.Advance(temp)
    589     tag_Callouts.ElemName_ends_10 = temp & remaining_ends
     612    tag_Callouts.ElemName_ends_10 = interpose32(temp, temp32, 10) & remaining_ends
    590613    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_10
    591614
    592615    # Group symbols of length 11
    593     temp = bitutil.Advance(temp)
    594     tag_Callouts.ElemName_ends_11 = temp & remaining_ends
     616    tag_Callouts.ElemName_ends_11 = interpose32(temp, temp32, 11) & remaining_ends
    595617    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_11
    596618
    597619    # Group symbols of length 12
    598     temp = bitutil.Advance(temp)
    599     tag_Callouts.ElemName_ends_12 = temp & remaining_ends
     620    tag_Callouts.ElemName_ends_12 = interpose32(temp, temp32, 12) & remaining_ends
    600621    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_12
    601622
    602623    # Group symbols of length 13
    603     temp = bitutil.Advance(temp)
    604     tag_Callouts.ElemName_ends_13 = temp & remaining_ends
     624    tag_Callouts.ElemName_ends_13 = interpose32(temp, temp32, 13) & remaining_ends
    605625    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_13
    606626
    607627    # Group symbols of length 14
    608     temp = bitutil.Advance(temp)
    609     tag_Callouts.ElemName_ends_14 = temp & remaining_ends
     628    tag_Callouts.ElemName_ends_14 = interpose32(temp, temp32, 14) & remaining_ends
    610629    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_14
    611630
    612631    # Group symbols of length 15
    613     temp = bitutil.Advance(temp)
    614     tag_Callouts.ElemName_ends_15 = temp & remaining_ends
     632    temp15 = interpose32(temp, temp32, 15)
     633    tag_Callouts.ElemName_ends_15 = temp15 & remaining_ends
    615634    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_15
    616635
    617636    # Group symbols of length 16
    618     temp = bitutil.Advance(temp)
     637    temp = bitutil.Advance(temp15)
    619638    tag_Callouts.ElemName_ends_16 = temp & remaining_ends
    620639    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_16
     
    628647    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
    629648
    630 #def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, check_streams, tag_Callouts, ref_Callouts, xml_names):
    631649def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams, hash_data):
    632        
     650
    633651        # Classify bytes for UTF-8 processing, whitespace and control
    634652        # processing and XML lexical analysis.
     
    637655        # Validate UTF-8 multibyte sequences and determine the UTF-8 scope streams
    638656        # Validate_utf8(basis_bits, u8)
    639                                
     657
    640658        Classify_bytes_Validate_utf8(basis_bits, lex, u8)
    641659
    642660        Add_scope_streams(lex, scope1)
    643    
     661
    644662        # Parse all comments, CDATA sections and processing instructions.
    645663        Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams)
    646                
     664
    647665        # All remaining '<' must be tag start characters; parse tags.
    648         Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts) 
     666        Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts)
    649667
    650668        # All remaining '&' must be reference start characters; parse them.
    651669        Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts)
    652        
     670
    653671        # Validate XML namespaces and generate bit streams to post validate non-ascii range XML names
    654672        Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams)
    655    
    656    
     673
     674
    657675        Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams)
    658676
     
    661679
    662680        Compute_Hash_Value_Bitstream(hash_data, basis_bits);
    663                
  • proto/SymbolTable/parabix2_symtab_pbs_log.py

    r1228 r1232  
    4545  xF4_scope = 0
    4646  xEF_scope = 0
    47  
    48   FFFE_FFFF = 0
    49   error = 0
    5047
    5148class Lex ():
     
    5653        CRLF = 0
    5754        RefStart = 0
    58         Semicolon = 0 
     55        Semicolon = 0
    5956        Colon = 0
    6057        LAngle = 0
     
    7774        Hex = 0
    7875        WS = 0
    79         error = 0
    8076
    8177class Scope1 ():
     
    9793        PI_ends = 0
    9894        CtCDPI_mask = 0
    99         error = 0
    10095
    10196class Ref_Callouts():
     
    106101        HexRef_starts = 0
    107102        HexRef_ends = 0
    108         error = 0
    109103
    110104class Hash_data():
     
    127121        EmptyTag_marks = 0
    128122        EndTag_marks = 0
    129         LAngleFollow = 0
    130         error = 0
    131 
    132 class Basis_bits():     
     123
     124class Basis_bits():
    133125        bit_0 = 0
    134126        bit_1 = 0
     
    139131        bit_6 = 0
    140132        bit_7 = 0
    141        
     133
    142134class Check_streams():
    143135        misc_mask = 0
    144136        non_ascii_name_starts = 0
    145137        non_ascii_names = 0
    146         tag_marks = 0
    147         name_follows = 0
    148         att_refs = 0
    149         error_mask = 0
     138        tag_marks = 0
     139        name_follows = 0
     140        att_refs = 0
    150141
    151142class Xml_names():
    152143        namespace_error = 0
    153144
    154 def Classify_bytes_Validate_utf8(basis_bits, lex, u8): 
     145def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
    155146        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    156147        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
     
    245236        temp65 = (temp64 & temp60);
    246237        lex.Hex = (temp62 | temp65);
    247         lex.error = x00_x1F &~ lex.WS
    248        
     238        lex_error = x00_x1F &~ lex.WS
     239        if lex_error & EOF_mask:
     240                error_tracker.NoteError("Error: illegal character", lex_error)
     241
     242
    249243        ### Validate_utf8(basis_bits, u8):
    250244        u8.unibyte = (~basis_bits.bit_0);
    251245        u8.suffix = 0
    252         u8.error = 0
    253         u8.FFFE_FFFF = 0
     246        u8_error = 0
     247        u8_FFFE_FFFF = 0
    254248        u8anyscope = 0 #local
    255249        if basis_bits.bit_0:
     
    266260                temp71 = (u8.prefix4 & temp70);
    267261                u8.badprefix = (temp68 | temp71);
    268                 u8.error = u8.badprefix
     262                u8_error = u8.badprefix
    269263                u8.scope22 = bitutil.Advance(u8.prefix2)
    270264                u8anyscope = u8.scope22
     
    296290                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
    297291                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
    298                
     292
    299293                        u8error1 = u8.xE0_scope & u8.x80_x9F
    300294                        u8error2 = u8.xED_scope & u8.xA0_xBF
    301295                        u8error3 = u8.xF0_scope & u8.x80_x8F
    302296                        u8error4 = u8.xF4_scope & u8.x90_xBF
    303        
    304                         u8.error |= u8error1 | u8error2 | u8error3 | u8error4
     297
     298                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
    305299
    306300                        EF_BF_pending = bitutil.Advance(u8.xEF_scope & u8.xBF)
    307301
    308                         u8.FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
    309         u8mismatch = u8anyscope ^ u8.suffix
    310         u8.error |= u8mismatch
    311        
     302                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
     303                u8mismatch = u8anyscope ^ u8.suffix
     304                u8_error |= u8mismatch | u8_FFFE_FFFF
     305                if u8_error:
     306                        error_tracker.NoteError("UTF-8 error found", (u8_error))
     307
     308
    312309def Add_scope_streams(lex, scope1):
    313310        #scope1.LAngle = bitutil.Advance(lex.LAngle)
     
    317314        w = lex.Hyphen | lex.QMark
    318315        v1 = bitutil.Advance(v)
    319         w1 = bitutil.Advance(w)
     316        w1 = bitutil.Advance(w)
    320317        scope1.LAngle = v1 &~ w1
    321318        scope1.Hyphen = v1 & w1
    322319        scope1.QMark = w1 &~ v1
     320        scope1.RefStart = 0 # default
    323321
    324322def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
     
    333331        ctCDPI_Callouts.PI_ends = 0
    334332        ctCDPI_Callouts.CtCDPI_mask = 0
    335         ctCDPI_Callouts.error = 0
     333        ctCDPI_error = 0
    336334        CtCDPI_starts = 0
    337335        Ct_errors = 0
     
    356354                CD_Ct_Cursor = bitutil.Advance(CtCDPI_Cursor & ~PI_Cursor)
    357355                CD_Cursor = CD_Ct_Cursor & lex.LBracket
    358                 Ct_Cursor = CD_Ct_Cursor & lex.Hyphen 
     356                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
    359357                ctCDPI_Callouts.PI_starts |= PI_Cursor
    360358                ctCDPI_Callouts.CD_starts |= CD_Cursor
    361359                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
    362                 Ct_Cursor = bitutil.Advance(Ct_Cursor) 
    363                 Ct_errors |= Ct_Cursor & ~ lex.Hyphen 
    364                 # Advance twice past <!--, so that we don't treat <!--- 
    365                 # as being a terminated comment.
     360                Ct_Cursor = bitutil.Advance(Ct_Cursor)
     361                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
     362                # Advance twice past <!--, so that we don't treat <!---
     363                # as being a terminated comment.
    366364                Ct_Cursor = bitutil.Advance(bitutil.Advance(Ct_Cursor))
    367365                PI_Cursor = bitutil.Advance(PI_Cursor)
     
    377375                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
    378376                CtCDPI_Cursor = bitutil.ScanTo(CtCDPI_Cursor, CtCDPI_start)
    379        
    380                 ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts             
     377
     378                ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
    381379                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
    382                 ctCDPI_Callouts.error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
    383                 ctCDPI_Callouts.error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
    384                 ctCDPI_Callouts.error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
     380                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
     381                ctCDPI_error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
     382                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
    385383                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
    386                 ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
    387                
     384                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
     385
     386        if ctCDPI_error:
     387                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
     388
    388389        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
    389390
    390391def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
    391392
    392        
     393
    393394        # Delimiters for scans.
    394395        DQuoteDelim = lex.DQuote | lex.LAngle
    395396        SQuoteDelim = lex.SQuote | lex.LAngle
    396397        AttListDelim = lex.Slash | lex.RAngle
    397        
     398
    398399        # Start the parallel parsing by inspecting the character
    399400        # after the opening "<" of a tag.
    400         tag_Callouts.LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
    401         tag_Callouts.ElemName_starts = tag_Callouts.LAngleFollow & ~lex.Slash
    402         tag_Callouts.EndTag_marks = tag_Callouts.LAngleFollow & lex.Slash
    403        
     401        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
     402        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
     403        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
     404
    404405        # Start Tag/Empty Element Tag Parsing
    405406
     
    409410        # Mark any occurrences of null names as errors.
    410411        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
    411        
     412
    412413        # Initialize the accumulators for attribute name and value positions.
    413         tag_Callouts.AttName_starts = 0 
     414        tag_Callouts.AttName_starts = 0
    414415        tag_Callouts.AttName_ends = 0
    415416        EqToCheck = 0
     
    435436                tag_Callouts.AttName_ends |= AttNameFollow
    436437                # Scan through WS to the expected '=' delimiter.
    437                 EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     438                # EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     439                # But use if test to optimize.
     440                if AttNameFollow & lex.WS:
     441                        EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     442                else: EqExpected = AttNameFollow
    438443                EqToCheck |= EqExpected
    439444                AttValPos = bitutil.ScanThru(EqExpected, EqExpected | lex.WS)
     
    447452                AttValFollow = bitutil.Advance(AttValEnd)
    448453                tag_Callouts.AttVal_ends |= AttValFollow
     454                #  AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     455                if AttValFollow & lex.WS:
     456                        AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     457                else: AfterWS = AttValFollow
    449458                AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
    450459                AttListEnd |= AfterWS & AttListDelim
     
    455464        # Mark any "/" characters found as the ends of empty element tags.
    456465        tag_Callouts.EmptyTag_marks = bitutil.Advance(AttListEnd & lex.Slash)
    457        
     466
    458467        # Check for errors.
    459468        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
     
    465474
    466475        # End Tag Parsing
    467         EndTagEnds = bitutil.ScanThru(bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan), lex.WS)
     476
     477        EndTagEnds = bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
     478        if EndTagEnds & lex.WS:
     479                EndTagEnds = bitutil.ScanThru(EndTagEnds, lex.WS)
    468480        ParseError |= EndTagEnds & ~lex.RAngle
    469         tag_Callouts.error = ParseError
    470                
     481        if ParseError:
     482                error_tracker.NoteError("Tag parsing error found", (ParseError))
     483
     484
    471485        # Attribute value spans
    472486        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
    473                        
     487
    474488def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
    475489        ref_Callouts.GenRef_starts = 0
     
    479493        ref_Callouts.HexRef_starts = 0
    480494        ref_Callouts.HexRef_ends = 0
    481         ref_Callouts.error = 0
     495        ref_error = 0
    482496
    483497        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
     
    490504                HexRef3 = NumRef3 & lex.x
    491505                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
    492                 ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3) 
     506                ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3)
    493507                ref_Callouts.GenRef_ends = bitutil.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
    494508                ref_Callouts.DecRef_ends = bitutil.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
     
    501515                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
    502516                ref_error3 = ref_ends &~ lex.Semicolon
    503                 ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
     517                ref_error = ref_error1 | ref_error2 | ref_error3
     518                if ref_error:
     519                        error_tracker.NoteError("Reference error found", (ref_error))
     520
     521
    504522
    505523def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
     
    519537        colon2_err = bitutil.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
    520538        ncname_err = ncname_stream & lex.Colon
    521         xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    522                        
     539        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
     540        if namespace_error:
     541                error_tracker.NoteError("error found", namespace_error)
     542
     543
    523544        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    524545        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    525    
     546
    526547def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
    527548    # Ensure that no occurrence of ]]> occurs outside of markup.
    528549    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
    529            
     550
    530551    # Consolidate and check for errors
    531     check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    532 
    533     check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     552    if CD_end_error:
     553                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
     554
     555
     556    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
    534557    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    535558    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
  • proto/SymbolTable/symtab_ls_template.cpp

    r1228 r1232  
    5656char * source;
    5757LineColTracker tracker;
     58
     59static inline void ReportError(const char * error_msg, int error_pos_in_block) {
     60  int error_line, error_column;
     61  tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     62  fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
     63}
     64
     65class ErrorTracker {
     66public:
     67    ErrorTracker() { noted_pos_in_block = -1;}
     68
     69    inline void NoteError(const char * error_msg, BitBlock err_strm) {
     70      int pos_in_block = count_forward_zeroes(err_strm);
     71      if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
     72        noted_pos_in_block = pos_in_block;
     73        noted_error = error_msg;
     74      }
     75    }
     76
     77    inline void If_Error_Report_First() {
     78      if (noted_pos_in_block > -1) {
     79              int error_line, error_column;
     80              ReportError(noted_error, noted_pos_in_block);
     81              exit(-1);
     82      }
     83    }
     84
     85private:
     86  const char * noted_error;
     87  int noted_pos_in_block;
     88};
     89
     90
    5891TagMatcher matcher;
    5992BitBlock EOF_mask = simd_const_1(1);
     93
     94ErrorTracker error_tracker;
    6095
    6196queue <size_t> elem_starts_buf;
     
    76111        }
    77112        return 0;
    78 }
    79 
    80 
    81 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    82   int error_line, error_column;
    83   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    84   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    85113}
    86114
     
    319347                }
    320348
    321                 if (bitblock_has_bit(check_streams.error_mask)) {
    322                   int errpos = count_forward_zeroes(check_streams.error_mask);
    323                   ReportError("error found", errpos);
    324               exit(-1);
    325                 }
     349                error_tracker.If_Error_Report_First();
    326350
    327351                matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
  • proto/SymbolTable/symtab_pbgs_log_template.cpp

    r1231 r1232  
    5858char * source;
    5959LineColTracker tracker;
     60
     61static inline void ReportError(const char * error_msg, int error_pos_in_block) {
     62  int error_line, error_column;
     63  tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     64  fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
     65}
     66
     67class ErrorTracker {
     68public:
     69    ErrorTracker() { noted_pos_in_block = -1;}
     70
     71    inline void NoteError(const char * error_msg, BitBlock err_strm) {
     72      int pos_in_block = count_forward_zeroes(err_strm);
     73      if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
     74        noted_pos_in_block = pos_in_block;
     75        noted_error = error_msg;
     76      }
     77    }
     78
     79    inline void If_Error_Report_First() {
     80      if (noted_pos_in_block > -1) {
     81              int error_line, error_column;
     82              ReportError(noted_error, noted_pos_in_block);
     83              exit(-1);
     84      }
     85    }
     86
     87private:
     88  const char * noted_error;
     89  int noted_pos_in_block;
     90};
     91
     92
    6093TagMatcher matcher;
    6194BitBlock EOF_mask = simd_const_1(1);
     95
     96ErrorTracker error_tracker;
     97
    6298BitBlock elem_starts;
    6399int previous_block_last_elem_start;
     
    291327    }
    292328    return 0;
    293 }
    294 
    295 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    296   int error_line, error_column;
    297   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    298   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    299329}
    300330
     
    523553    }
    524554
    525     if (bitblock_has_bit(check_streams.error_mask)) {
    526       int errpos = count_forward_zeroes(check_streams.error_mask);
    527       ReportError("error found", errpos);
    528         exit(-1);
    529     }
     555    error_tracker.If_Error_Report_First();
    530556
    531557    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
  • proto/SymbolTable/symtab_pbgs_template.cpp

    r1231 r1232  
    5656char * source;
    5757LineColTracker tracker;
     58
     59static inline void ReportError(const char * error_msg, int error_pos_in_block) {
     60  int error_line, error_column;
     61  tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     62  fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
     63}
     64
     65class ErrorTracker {
     66public:
     67    ErrorTracker() { noted_pos_in_block = -1;}
     68
     69    inline void NoteError(const char * error_msg, BitBlock err_strm) {
     70      int pos_in_block = count_forward_zeroes(err_strm);
     71      if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
     72        noted_pos_in_block = pos_in_block;
     73        noted_error = error_msg;
     74      }
     75    }
     76
     77    inline void If_Error_Report_First() {
     78      if (noted_pos_in_block > -1) {
     79              int error_line, error_column;
     80              ReportError(noted_error, noted_pos_in_block);
     81              exit(-1);
     82      }
     83    }
     84
     85private:
     86  const char * noted_error;
     87  int noted_pos_in_block;
     88};
     89
     90
    5891TagMatcher matcher;
    5992BitBlock EOF_mask = simd_const_1(1);
     93
     94ErrorTracker error_tracker;
    6095BitBlock elem_starts;
    6196int previous_block_last_elem_start;
     
    73108    {
    74109        // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
    75         return sizeof(BitBlock)*8 - count_backward_zeroes (temp) - 1;
     110        return BLOCK_SIZE - count_backward_zeroes (temp) - 1;
    76111    }
    77112    else
     
    129164    int gid = 0;
    130165
    131     if (lgth > 32)
    132     {
    133         gid = pbgs_symbol_table.Lookup_or_Insert_Name_32(source + start, hashvalue, lgth);
    134     }
    135     else
     166//    if (lgth < 32)
     167//    {
     168//      gid = pbgs_symbol_table.Lookup_or_Insert_Name_32(source + start, hashvalue, lgth);
     169//    }
     170//    else
    136171    {
    137172        gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
     
    161196    }
    162197    return 0;
    163 }
    164 
    165 
    166 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    167   int error_line, error_column;
    168   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    169   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    170198}
    171199
     
    449477    }
    450478
    451     if (bitblock_has_bit(check_streams.error_mask)) {
    452       int errpos = count_forward_zeroes(check_streams.error_mask);
    453       ReportError("error found", errpos);
    454         exit(-1);
    455     }
     479    error_tracker.If_Error_Report_First();
    456480
    457481    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
  • proto/SymbolTable/symtab_stl_template.cpp

    r1228 r1232  
    5555char * source;
    5656LineColTracker tracker;
     57
     58static inline void ReportError(const char * error_msg, int error_pos_in_block) {
     59  int error_line, error_column;
     60  tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     61  fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
     62}
     63
     64class ErrorTracker {
     65public:
     66    ErrorTracker() { noted_pos_in_block = -1;}
     67
     68    inline void NoteError(const char * error_msg, BitBlock err_strm) {
     69      int pos_in_block = count_forward_zeroes(err_strm);
     70      if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
     71        noted_pos_in_block = pos_in_block;
     72        noted_error = error_msg;
     73      }
     74    }
     75
     76    inline void If_Error_Report_First() {
     77      if (noted_pos_in_block > -1) {
     78              int error_line, error_column;
     79              ReportError(noted_error, noted_pos_in_block);
     80              exit(-1);
     81      }
     82    }
     83
     84private:
     85  const char * noted_error;
     86  int noted_pos_in_block;
     87};
     88
     89
    5790TagMatcher matcher;
    5891BitBlock EOF_mask = simd_const_1(1);
     92
     93ErrorTracker error_tracker;
    5994
    6095queue <size_t> elem_starts_buf;
     
    76111        }
    77112        return 0;
    78 }
    79 
    80 
    81 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    82   int error_line, error_column;
    83   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    84   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    85113}
    86114
     
    319347                }
    320348
    321                 if (bitblock_has_bit(check_streams.att_refs)){
     349                if (bitblock_has_bit(check_streams.att_refs)){
    322350                  StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    323351                }
    324352
    325                 if (bitblock_has_bit(check_streams.error_mask)) {
    326                   int errpos = count_forward_zeroes(check_streams.error_mask);
    327                   ReportError("error found", errpos);
    328               exit(-1);
    329                 }
    330 
    331                 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     353                error_tracker.If_Error_Report_First();
     354
     355                matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    332356                tracker.AdvanceBlock();
    333357}
  • proto/parabix2/pablo_template.cpp

    r1230 r1232  
    286286                error_tracker.If_Error_Report_First();
    287287
    288                 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     288                matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    289289                tracker.AdvanceBlock();
    290290}
  • proto/parabix2/parabix2_pablo.py

    r1230 r1232  
    550550   
    551551   
    552 #def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, check_streams, tag_Callouts, ref_Callouts, xml_names):
    553552def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams): 
    554553       
Note: See TracChangeset for help on using the changeset viewer.