Ignore:
Timestamp:
Oct 29, 2010, 2:37:43 PM (9 years ago)
Author:
ksherdy
Message:

Add UTF8 validation. Add UTF8 character set definitions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/JSON/json_prototype.py

    r675 r682  
    1717import bitutil
    1818import byteclass
     19import u8u16
    1920import math
    2021import sys
     
    2425
    2526# Globals
     27#
    2628# Bitstream function defs input/output *only* bitstream type variables.
    2729# Global declarations allow debug blocks in bitstream defs. Do not shadow variables.
     
    155157
    156158        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    157         (lex) = byteclass.classify_bytes(bit)
     159        (u8, lex, ctrl) = byteclass.classify_bytes(bit)
    158160        Errors = validate_number(lex,EOF_mask)
    159161
     
    199201
    200202        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    201         (lex) = byteclass.classify_bytes(bit)
     203        (u8, lex, ctrl) = byteclass.classify_bytes(bit)
    202204        (escape) = parse_escape(lex,EOF_mask)
    203205
     
    251253
    252254        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    253         (lex) = byteclass.classify_bytes(bit)
     255        (u8, lex, ctrl) = byteclass.classify_bytes(bit)
    254256        brackets = (lex.LSquareBracket | lex.RSquareBracket)
    255257        (parity) = parallel_prefix_parity(brackets,lgth)
     
    262264        return
    263265
     266def parse_json(u8data):
     267 
     268        # Transpose to parallel bit streams and prepare an EOF mask.
     269        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
     270
     271        # Classify bytes for UTF-8 processing, whitespace, control and JSON lexical analysis.
     272        (u8, control, lex) = byteclass.classify_bytes(bit)
     273
     274        # Validate UTF-8 multibyte sequences and determine the UTF-8 scope streams.
     275        u8 = u8u16.validate_utf8(u8) 
     276 
     277        return
     278 
     279def demo_parse_json(u8data):
     280 
     281        global lgth
     282        lgth = len(u8data)
     283       
     284        parse_json(u8data)
     285 
     286        return
     287
    264288if __name__ == "__main__":
    265289        import doctest
     
    273297#       demo_validate_number(u8data)
    274298#       demo_parse_escape(u8data)
    275         demo_parallel_prefix_parity(u8data)
     299#       demo_parallel_prefix_parity(u8data)
     300#       demo_parse_json(u8data)
Note: See TracChangeset for help on using the changeset viewer.