Ignore:
Timestamp:
Jul 28, 2011, 3:01:53 PM (8 years ago)
Author:
vla24
Message:

Updated symbol table implementation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/parabix2_symtab_pbs_adv.py

    r1228 r1232  
    4545  xF4_scope = 0
    4646  xEF_scope = 0
    47  
    48   FFFE_FFFF = 0
    49   error = 0
    5047
    5148class Lex ():
     
    7774        Hex = 0
    7875        WS = 0
    79         error = 0
    8076
    8177class Scope1 ():
     
    9793        PI_ends = 0
    9894        CtCDPI_mask = 0
    99         error = 0
    10095
    10196class Ref_Callouts():
     
    106101        HexRef_starts = 0
    107102        HexRef_ends = 0
    108         error = 0
    109103
    110104class Hash_data():
     
    138132        EmptyTag_marks = 0
    139133        EndTag_marks = 0
    140         LAngleFollow = 0
    141         error = 0
    142 
    143 class Basis_bits():     
     134
     135class Basis_bits():
    144136        bit_0 = 0
    145137        bit_1 = 0
     
    150142        bit_6 = 0
    151143        bit_7 = 0
    152        
     144
    153145class Check_streams():
    154146        misc_mask = 0
    155147        non_ascii_name_starts = 0
    156148        non_ascii_names = 0
    157         tag_marks = 0
    158         name_follows = 0
    159         att_refs = 0
    160         error_mask = 0
     149        tag_marks = 0
     150        name_follows = 0
     151        att_refs = 0
    161152
    162153class Xml_names():
    163154        namespace_error = 0
    164155
    165 def Classify_bytes_Validate_utf8(basis_bits, lex, u8): 
     156def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
    166157        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    167158        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
     
    256247        temp65 = (temp64 & temp60);
    257248        lex.Hex = (temp62 | temp65);
    258         lex.error = x00_x1F &~ lex.WS
    259        
     249        lex_error = x00_x1F &~ lex.WS
     250        if lex_error & EOF_mask:
     251                error_tracker.NoteError("Error: illegal character", lex_error)
     252
     253
    260254        ### Validate_utf8(basis_bits, u8):
    261255        u8.unibyte = (~basis_bits.bit_0);
    262256        u8.suffix = 0
    263         u8.error = 0
    264         u8.FFFE_FFFF = 0
     257        u8_error = 0
     258        u8_FFFE_FFFF = 0
    265259        u8anyscope = 0 #local
    266260        if basis_bits.bit_0:
     
    277271                temp71 = (u8.prefix4 & temp70);
    278272                u8.badprefix = (temp68 | temp71);
    279                 u8.error = u8.badprefix
     273                u8_error = u8.badprefix
    280274                u8.scope22 = bitutil.Advance(u8.prefix2)
    281275                u8anyscope = u8.scope22
     
    307301                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
    308302                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
    309                
     303
    310304                        u8error1 = u8.xE0_scope & u8.x80_x9F
    311305                        u8error2 = u8.xED_scope & u8.xA0_xBF
    312306                        u8error3 = u8.xF0_scope & u8.x80_x8F
    313307                        u8error4 = u8.xF4_scope & u8.x90_xBF
    314        
    315                         u8.error |= u8error1 | u8error2 | u8error3 | u8error4
     308
     309                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
    316310
    317311                        EF_BF_pending = bitutil.Advance(u8.xEF_scope & u8.xBF)
    318312
    319                         u8.FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
    320         u8mismatch = u8anyscope ^ u8.suffix
    321         u8.error |= u8mismatch
    322        
     313                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
     314                u8mismatch = u8anyscope ^ u8.suffix
     315                u8_error |= u8mismatch | u8_FFFE_FFFF
     316                if u8_error:
     317                        error_tracker.NoteError("UTF-8 error found", (u8_error))
     318
     319
    323320def Add_scope_streams(lex, scope1):
    324321        #scope1.LAngle = bitutil.Advance(lex.LAngle)
     
    328325        w = lex.Hyphen | lex.QMark
    329326        v1 = bitutil.Advance(v)
    330         w1 = bitutil.Advance(w)
     327        w1 = bitutil.Advance(w)
    331328        scope1.LAngle = v1 &~ w1
    332329        scope1.Hyphen = v1 & w1
    333330        scope1.QMark = w1 &~ v1
     331        scope1.RefStart = 0 # default
    334332
    335333def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
     
    344342        ctCDPI_Callouts.PI_ends = 0
    345343        ctCDPI_Callouts.CtCDPI_mask = 0
    346         ctCDPI_Callouts.error = 0
     344        ctCDPI_error = 0
    347345        CtCDPI_starts = 0
    348346        Ct_errors = 0
     
    367365                CD_Ct_Cursor = bitutil.Advance(CtCDPI_Cursor & ~PI_Cursor)
    368366                CD_Cursor = CD_Ct_Cursor & lex.LBracket
    369                 Ct_Cursor = CD_Ct_Cursor & lex.Hyphen 
     367                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
    370368                ctCDPI_Callouts.PI_starts |= PI_Cursor
    371369                ctCDPI_Callouts.CD_starts |= CD_Cursor
    372370                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
    373                 Ct_Cursor = bitutil.Advance(Ct_Cursor) 
    374                 Ct_errors |= Ct_Cursor & ~ lex.Hyphen 
    375                 # Advance twice past <!--, so that we don't treat <!--- 
    376                 # as being a terminated comment.
     371                Ct_Cursor = bitutil.Advance(Ct_Cursor)
     372                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
     373                # Advance twice past <!--, so that we don't treat <!---
     374                # as being a terminated comment.
    377375                Ct_Cursor = bitutil.Advance(bitutil.Advance(Ct_Cursor))
    378376                PI_Cursor = bitutil.Advance(PI_Cursor)
     
    388386                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
    389387                CtCDPI_Cursor = bitutil.ScanTo(CtCDPI_Cursor, CtCDPI_start)
    390        
    391                 ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts             
     388
     389                ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
    392390                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
    393                 ctCDPI_Callouts.error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
    394                 ctCDPI_Callouts.error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
    395                 ctCDPI_Callouts.error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
     391                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
     392                ctCDPI_error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
     393                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
    396394                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
    397                 ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
    398                
     395                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
     396
     397        if ctCDPI_error:
     398                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
     399
    399400        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
    400401
    401402def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
    402403
    403        
     404
    404405        # Delimiters for scans.
    405406        DQuoteDelim = lex.DQuote | lex.LAngle
    406407        SQuoteDelim = lex.SQuote | lex.LAngle
    407408        AttListDelim = lex.Slash | lex.RAngle
    408        
     409
    409410        # Start the parallel parsing by inspecting the character
    410411        # after the opening "<" of a tag.
    411         tag_Callouts.LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
    412         tag_Callouts.ElemName_starts = tag_Callouts.LAngleFollow & ~lex.Slash
    413         tag_Callouts.EndTag_marks = tag_Callouts.LAngleFollow & lex.Slash
    414        
     412        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
     413        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
     414        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
     415
    415416        # Start Tag/Empty Element Tag Parsing
    416417
     
    420421        # Mark any occurrences of null names as errors.
    421422        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
    422        
     423
    423424        # Initialize the accumulators for attribute name and value positions.
    424         tag_Callouts.AttName_starts = 0 
     425        tag_Callouts.AttName_starts = 0
    425426        tag_Callouts.AttName_ends = 0
    426427        EqToCheck = 0
     
    446447                tag_Callouts.AttName_ends |= AttNameFollow
    447448                # Scan through WS to the expected '=' delimiter.
    448                 EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     449                # EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     450                # But use if test to optimize.
     451                if AttNameFollow & lex.WS:
     452                        EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     453                else: EqExpected = AttNameFollow
    449454                EqToCheck |= EqExpected
    450455                AttValPos = bitutil.ScanThru(EqExpected, EqExpected | lex.WS)
     
    458463                AttValFollow = bitutil.Advance(AttValEnd)
    459464                tag_Callouts.AttVal_ends |= AttValFollow
     465                #  AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     466                if AttValFollow & lex.WS:
     467                        AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     468                else: AfterWS = AttValFollow
    460469                AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
    461470                AttListEnd |= AfterWS & AttListDelim
     
    466475        # Mark any "/" characters found as the ends of empty element tags.
    467476        tag_Callouts.EmptyTag_marks = bitutil.Advance(AttListEnd & lex.Slash)
    468        
     477
    469478        # Check for errors.
    470479        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
     
    476485
    477486        # End Tag Parsing
    478         EndTagEnds = bitutil.ScanThru(bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan), lex.WS)
     487
     488        EndTagEnds = bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
     489        if EndTagEnds & lex.WS:
     490                EndTagEnds = bitutil.ScanThru(EndTagEnds, lex.WS)
    479491        ParseError |= EndTagEnds & ~lex.RAngle
    480         tag_Callouts.error = ParseError
    481                
     492        if ParseError:
     493                error_tracker.NoteError("Tag parsing error found", (ParseError))
     494
     495
    482496        # Attribute value spans
    483497        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
    484                        
     498
    485499def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
    486500        ref_Callouts.GenRef_starts = 0
     
    490504        ref_Callouts.HexRef_starts = 0
    491505        ref_Callouts.HexRef_ends = 0
    492         ref_Callouts.error = 0
     506        ref_error = 0
    493507
    494508        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
     
    501515                HexRef3 = NumRef3 & lex.x
    502516                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
    503                 ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3) 
     517                ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3)
    504518                ref_Callouts.GenRef_ends = bitutil.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
    505519                ref_Callouts.DecRef_ends = bitutil.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
     
    512526                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
    513527                ref_error3 = ref_ends &~ lex.Semicolon
    514                 ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
     528                ref_error = ref_error1 | ref_error2 | ref_error3
     529                if ref_error:
     530                        error_tracker.NoteError("Reference error found", (ref_error))
     531
     532
    515533
    516534def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
     
    530548        colon2_err = bitutil.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
    531549        ncname_err = ncname_stream & lex.Colon
    532         xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    533                        
     550        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
     551        if namespace_error:
     552                error_tracker.NoteError("error found", namespace_error)
     553
     554
    534555        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    535556        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    536    
     557
    537558def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
    538559    # Ensure that no occurrence of ]]> occurs outside of markup.
    539560    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
    540            
     561
    541562    # Consolidate and check for errors
    542     check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    543 
    544     check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     563    if CD_end_error:
     564                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
     565
     566
     567    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
    545568    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    546569    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
     
    627650    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
    628651
    629 #def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, check_streams, tag_Callouts, ref_Callouts, xml_names):
    630652def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams, hash_data):
    631653       
Note: See TracChangeset for help on using the changeset viewer.