Ignore:
Timestamp:
Jun 26, 2010, 12:37:34 PM (9 years ago)
Author:
cameron
Message:

Prevalidation of XML name syntax

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/parabix2.py

    r411 r416  
    554554
    555555
     556#
     557# Fully validate ASCII-based names and identify non-ASCII
     558# positions within names.
     559
     560def prevalidate_names(lex, name_stream, nmtoken_stream):
     561        """Fully validate ASCII-based names and identify non-ASCII positions within names.
     562
     563        >>> demo_prevalidate_names("<good -bad='hyphen'/><_OK/><:funny:butOK/><1problem a='b' d423='x'>")
     564        input data: <good -bad='hyphen'/><_OK/><:funny:butOK/><1problem a='b' d423='x'>
     565        names     : _1111_1111____________111___111111111111___11111111_1_____1111_____
     566        name_check: ______1____________________________________1_______________________
     567"""
     568        name_check = name_stream &~ bitutil.Advance(name_stream) & ~lex.ASCII_name_start
     569        name_check |= (name_stream | nmtoken_stream) & ~lex.ASCII_name_char
     570        return name_check
     571
     572def demo_prevalidate_names(u8data):
     573        lgth = len(u8data)
     574        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
     575        (u8, control, lex) = byteclass.classify_bytes(bit)
     576        lex = add_multiliterals(lex)
     577        markup1 = parse_CtCDPI(lex, EOF_mask)
     578        callouts = parse_tags(lex, markup1.CtCDPI_mask, EOF_mask)
     579        name_stream = callouts.ElemNames | callouts.AttNames
     580        name_check = prevalidate_names(lex, name_stream, 0)
     581        bitutil.print_aligned_u8_byte_streams([('input data', u8data),
     582                              ('names', bitutil.bitstream2string(name_stream, lgth)),
     583                              ('name_check', bitutil.bitstream2string(name_check, lgth))])
    556584
    557585def parabix_parse(u8data):
Note: See TracChangeset for help on using the changeset viewer.