Changeset 1998 for proto/parabix2


Ignore:
Timestamp:
Apr 6, 2012, 12:52:08 PM (7 years ago)
Author:
cameron
Message:

Fixes for parsing system/public IDs.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/parabix2.py

    r1987 r1998  
    377377                                error_tracker.NoteError("Error in comment syntax", Ct_error)
    378378                else:
     379                        #print "DTD"
    379380                        DTD_Cursor = ParseDTD(CD_Ct_Cursor, DTD_out)
    380381                        CtCDPI_ends |= DTD_Cursor
     
    498499#  Generate callout.{pubid_start, pubid_end, sysid_start, sysid_end, end_marker}
    499500def ParseExternalID(input_marker, require_system_id, callout):
    500         if pablo.match('PUBLIC', input_marker):
     501        public_marker = pablo.match('PUBLIC', input_marker)
     502        system_marker = pablo.match('SYSTEM', input_marker)
     503        no_marker = input_marker &~ (public_marker | system_marker)
     504        if no_marker:
     505                error_tracker.NoteError('ExpectedSystemOrPublicId', no_marker)
     506        if public_marker:
    501507                marker = pablo.AdvancebyPos(input_marker, 6)
    502508                if marker &~ lex.WS:
     
    522528                                error_tracker.NoteError('UnterminatedDOCTYPE', marker)
    523529                        callout.sysid_end = marker
    524                 elif require_system_id:
    525                         error_tracker.NoteError('ExpectedQuotedString', marker)
    526         # Note the following is problematic for current Pablo, because
    527         # else branches are assumed not to have any carries.   (That is,
    528         # Pablo ifs are based on optimization.)
    529         # If we just use "if" here it will work for valid input, but
    530         # will accept some invalid cases.
    531         elif pablo.match('SYSTEM', input_marker):
    532                 # If we just processed a 'PUBLIC' id declaration, this is an error.
    533                 # But the following won't work, because the callout might be in a
    534                 # block prior to the match with SYSTEM.
    535                 #if callout.pubid_start:
    536                 #       NoteXercesXMLErr(UnterminatedDOCTYPE, marker)
    537                 # Else accept the 'SYSTEM' and continue.
     530                else:
     531                        if require_system_id:
     532                                error_tracker.NoteError('ExpectedQuotedString', marker)
     533        if system_marker:
    538534                marker = pablo.AdvancebyPos(input_marker, 6)
    539535                if marker &~ lex.WS:
     
    564560                error_tracker.NoteError('NoRootElemInDOCTYPE', DTD_out.root_name_start)
    565561        marker = pablo.ScanThru(DTD_out.root_name_follow, lex.WS)
    566         require_system_id = 1
    567         ParseExternalID(marker, require_system_id, id_out)
    568         DTD_out.pubid_start = id_out.pubid_start
    569         DTD_out.pubid_end = id_out.pubid_end
    570         DTD_out.sysid_start = id_out.sysid_start
    571         DTD_out.sysid_end = id_out.sysid_end
    572         marker = id_out.end_marker
    573         marker = pablo.ScanThru(marker, id_out.end_marker | lex.WS)
     562        if marker & lex.NameScan:
     563                require_system_id = 1
     564                ParseExternalID(marker, require_system_id, id_out)
     565                DTD_out.pubid_start = id_out.pubid_start
     566                DTD_out.pubid_end = id_out.pubid_end
     567                DTD_out.sysid_start = id_out.sysid_start
     568                DTD_out.sysid_end = id_out.sysid_end
     569                marker = id_out.end_marker
     570                marker = pablo.ScanThru(marker, id_out.end_marker | lex.WS)
    574571        # Now must be at "[" for internal subset or ">", if none.
    575572        if pablo.match('[', marker):
     
    609606                                                error_tracker.NoteError('IllegalSequenceInComment', marker)
    610607                                if pablo.match('!ENTITY', marker):
     608                                        #print "ENTITY"
    611609                                        is_PE_entity = 0
    612610                                        marker = pablo.AdvancebyPos(marker, 7)
     
    908906
    909907if __name__ == "__main__":
    910        
     908        #print "Starting ..."
    911909        if len(sys.argv) > 1:
    912910                u8data = pablo.readfile(sys.argv[1])
Note: See TracChangeset for help on using the changeset viewer.