Ignore:
Timestamp:
Jul 28, 2011, 3:01:53 PM (8 years ago)
Author:
vla24
Message:

Updated symbol table implementation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/parabix2_symtab_pbs_log.py

    r1228 r1232  
    4545  xF4_scope = 0
    4646  xEF_scope = 0
    47  
    48   FFFE_FFFF = 0
    49   error = 0
    5047
    5148class Lex ():
     
    5653        CRLF = 0
    5754        RefStart = 0
    58         Semicolon = 0 
     55        Semicolon = 0
    5956        Colon = 0
    6057        LAngle = 0
     
    7774        Hex = 0
    7875        WS = 0
    79         error = 0
    8076
    8177class Scope1 ():
     
    9793        PI_ends = 0
    9894        CtCDPI_mask = 0
    99         error = 0
    10095
    10196class Ref_Callouts():
     
    106101        HexRef_starts = 0
    107102        HexRef_ends = 0
    108         error = 0
    109103
    110104class Hash_data():
     
    127121        EmptyTag_marks = 0
    128122        EndTag_marks = 0
    129         LAngleFollow = 0
    130         error = 0
    131 
    132 class Basis_bits():     
     123
     124class Basis_bits():
    133125        bit_0 = 0
    134126        bit_1 = 0
     
    139131        bit_6 = 0
    140132        bit_7 = 0
    141        
     133
    142134class Check_streams():
    143135        misc_mask = 0
    144136        non_ascii_name_starts = 0
    145137        non_ascii_names = 0
    146         tag_marks = 0
    147         name_follows = 0
    148         att_refs = 0
    149         error_mask = 0
     138        tag_marks = 0
     139        name_follows = 0
     140        att_refs = 0
    150141
    151142class Xml_names():
    152143        namespace_error = 0
    153144
    154 def Classify_bytes_Validate_utf8(basis_bits, lex, u8): 
     145def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
    155146        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    156147        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
     
    245236        temp65 = (temp64 & temp60);
    246237        lex.Hex = (temp62 | temp65);
    247         lex.error = x00_x1F &~ lex.WS
    248        
     238        lex_error = x00_x1F &~ lex.WS
     239        if lex_error & EOF_mask:
     240                error_tracker.NoteError("Error: illegal character", lex_error)
     241
     242
    249243        ### Validate_utf8(basis_bits, u8):
    250244        u8.unibyte = (~basis_bits.bit_0);
    251245        u8.suffix = 0
    252         u8.error = 0
    253         u8.FFFE_FFFF = 0
     246        u8_error = 0
     247        u8_FFFE_FFFF = 0
    254248        u8anyscope = 0 #local
    255249        if basis_bits.bit_0:
     
    266260                temp71 = (u8.prefix4 & temp70);
    267261                u8.badprefix = (temp68 | temp71);
    268                 u8.error = u8.badprefix
     262                u8_error = u8.badprefix
    269263                u8.scope22 = bitutil.Advance(u8.prefix2)
    270264                u8anyscope = u8.scope22
     
    296290                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
    297291                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
    298                
     292
    299293                        u8error1 = u8.xE0_scope & u8.x80_x9F
    300294                        u8error2 = u8.xED_scope & u8.xA0_xBF
    301295                        u8error3 = u8.xF0_scope & u8.x80_x8F
    302296                        u8error4 = u8.xF4_scope & u8.x90_xBF
    303        
    304                         u8.error |= u8error1 | u8error2 | u8error3 | u8error4
     297
     298                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
    305299
    306300                        EF_BF_pending = bitutil.Advance(u8.xEF_scope & u8.xBF)
    307301
    308                         u8.FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
    309         u8mismatch = u8anyscope ^ u8.suffix
    310         u8.error |= u8mismatch
    311        
     302                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
     303                u8mismatch = u8anyscope ^ u8.suffix
     304                u8_error |= u8mismatch | u8_FFFE_FFFF
     305                if u8_error:
     306                        error_tracker.NoteError("UTF-8 error found", (u8_error))
     307
     308
    312309def Add_scope_streams(lex, scope1):
    313310        #scope1.LAngle = bitutil.Advance(lex.LAngle)
     
    317314        w = lex.Hyphen | lex.QMark
    318315        v1 = bitutil.Advance(v)
    319         w1 = bitutil.Advance(w)
     316        w1 = bitutil.Advance(w)
    320317        scope1.LAngle = v1 &~ w1
    321318        scope1.Hyphen = v1 & w1
    322319        scope1.QMark = w1 &~ v1
     320        scope1.RefStart = 0 # default
    323321
    324322def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
     
    333331        ctCDPI_Callouts.PI_ends = 0
    334332        ctCDPI_Callouts.CtCDPI_mask = 0
    335         ctCDPI_Callouts.error = 0
     333        ctCDPI_error = 0
    336334        CtCDPI_starts = 0
    337335        Ct_errors = 0
     
    356354                CD_Ct_Cursor = bitutil.Advance(CtCDPI_Cursor & ~PI_Cursor)
    357355                CD_Cursor = CD_Ct_Cursor & lex.LBracket
    358                 Ct_Cursor = CD_Ct_Cursor & lex.Hyphen 
     356                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
    359357                ctCDPI_Callouts.PI_starts |= PI_Cursor
    360358                ctCDPI_Callouts.CD_starts |= CD_Cursor
    361359                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
    362                 Ct_Cursor = bitutil.Advance(Ct_Cursor) 
    363                 Ct_errors |= Ct_Cursor & ~ lex.Hyphen 
    364                 # Advance twice past <!--, so that we don't treat <!--- 
    365                 # as being a terminated comment.
     360                Ct_Cursor = bitutil.Advance(Ct_Cursor)
     361                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
     362                # Advance twice past <!--, so that we don't treat <!---
     363                # as being a terminated comment.
    366364                Ct_Cursor = bitutil.Advance(bitutil.Advance(Ct_Cursor))
    367365                PI_Cursor = bitutil.Advance(PI_Cursor)
     
    377375                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
    378376                CtCDPI_Cursor = bitutil.ScanTo(CtCDPI_Cursor, CtCDPI_start)
    379        
    380                 ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts             
     377
     378                ctCDPI_Callouts.CtCDPI_mask = bitutil.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
    381379                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
    382                 ctCDPI_Callouts.error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
    383                 ctCDPI_Callouts.error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
    384                 ctCDPI_Callouts.error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
     380                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
     381                ctCDPI_error |= bitutil.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
     382                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
    385383                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
    386                 ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
    387                
     384                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
     385
     386        if ctCDPI_error:
     387                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
     388
    388389        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
    389390
    390391def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
    391392
    392        
     393
    393394        # Delimiters for scans.
    394395        DQuoteDelim = lex.DQuote | lex.LAngle
    395396        SQuoteDelim = lex.SQuote | lex.LAngle
    396397        AttListDelim = lex.Slash | lex.RAngle
    397        
     398
    398399        # Start the parallel parsing by inspecting the character
    399400        # after the opening "<" of a tag.
    400         tag_Callouts.LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
    401         tag_Callouts.ElemName_starts = tag_Callouts.LAngleFollow & ~lex.Slash
    402         tag_Callouts.EndTag_marks = tag_Callouts.LAngleFollow & lex.Slash
    403        
     401        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
     402        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
     403        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
     404
    404405        # Start Tag/Empty Element Tag Parsing
    405406
     
    409410        # Mark any occurrences of null names as errors.
    410411        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
    411        
     412
    412413        # Initialize the accumulators for attribute name and value positions.
    413         tag_Callouts.AttName_starts = 0 
     414        tag_Callouts.AttName_starts = 0
    414415        tag_Callouts.AttName_ends = 0
    415416        EqToCheck = 0
     
    435436                tag_Callouts.AttName_ends |= AttNameFollow
    436437                # Scan through WS to the expected '=' delimiter.
    437                 EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     438                # EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     439                # But use if test to optimize.
     440                if AttNameFollow & lex.WS:
     441                        EqExpected = bitutil.ScanThru(AttNameFollow, lex.WS)
     442                else: EqExpected = AttNameFollow
    438443                EqToCheck |= EqExpected
    439444                AttValPos = bitutil.ScanThru(EqExpected, EqExpected | lex.WS)
     
    447452                AttValFollow = bitutil.Advance(AttValEnd)
    448453                tag_Callouts.AttVal_ends |= AttValFollow
     454                #  AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     455                if AttValFollow & lex.WS:
     456                        AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
     457                else: AfterWS = AttValFollow
    449458                AfterWS = bitutil.ScanThru(AttValFollow, lex.WS)
    450459                AttListEnd |= AfterWS & AttListDelim
     
    455464        # Mark any "/" characters found as the ends of empty element tags.
    456465        tag_Callouts.EmptyTag_marks = bitutil.Advance(AttListEnd & lex.Slash)
    457        
     466
    458467        # Check for errors.
    459468        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
     
    465474
    466475        # End Tag Parsing
    467         EndTagEnds = bitutil.ScanThru(bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan), lex.WS)
     476
     477        EndTagEnds = bitutil.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
     478        if EndTagEnds & lex.WS:
     479                EndTagEnds = bitutil.ScanThru(EndTagEnds, lex.WS)
    468480        ParseError |= EndTagEnds & ~lex.RAngle
    469         tag_Callouts.error = ParseError
    470                
     481        if ParseError:
     482                error_tracker.NoteError("Tag parsing error found", (ParseError))
     483
     484
    471485        # Attribute value spans
    472486        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
    473                        
     487
    474488def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
    475489        ref_Callouts.GenRef_starts = 0
     
    479493        ref_Callouts.HexRef_starts = 0
    480494        ref_Callouts.HexRef_ends = 0
    481         ref_Callouts.error = 0
     495        ref_error = 0
    482496
    483497        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
     
    490504                HexRef3 = NumRef3 & lex.x
    491505                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
    492                 ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3) 
     506                ref_Callouts.HexRef_starts = bitutil.Advance(HexRef3)
    493507                ref_Callouts.GenRef_ends = bitutil.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
    494508                ref_Callouts.DecRef_ends = bitutil.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
     
    501515                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
    502516                ref_error3 = ref_ends &~ lex.Semicolon
    503                 ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
     517                ref_error = ref_error1 | ref_error2 | ref_error3
     518                if ref_error:
     519                        error_tracker.NoteError("Reference error found", (ref_error))
     520
     521
    504522
    505523def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
     
    519537        colon2_err = bitutil.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
    520538        ncname_err = ncname_stream & lex.Colon
    521         xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    522                        
     539        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
     540        if namespace_error:
     541                error_tracker.NoteError("error found", namespace_error)
     542
     543
    523544        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    524545        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    525    
     546
    526547def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
    527548    # Ensure that no occurrence of ]]> occurs outside of markup.
    528549    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
    529            
     550
    530551    # Consolidate and check for errors
    531     check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    532 
    533     check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     552    if CD_end_error:
     553                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
     554
     555
     556    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
    534557    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    535558    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
Note: See TracChangeset for help on using the changeset viewer.