Ignore:
Timestamp:
Nov 16, 2010, 7:42:01 PM (9 years ago)
Author:
ksherdy
Message:

Demo logic to validate JSON atoms.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/JSON/json_prototype.py

    r704 r713  
    2121import sys
    2222
    23 debug = False
     23debug = True
    2424filename = "json_prototype.py"
    2525
    2626# Globals
    2727#
    28 # Bitstream function defs input/output *only* bitstream type variables.
    29 # Global declarations allow debug blocks in bitstream defs. Do not shadow variables.
     28# Bitstream function definitions input/output *only* bitstream type variables.
     29#
     30# Global declarations allow debug blocks in bitstream processing definitions.
     31#
     32# Do not shadow the global variables 'u8data' or 'lgth' with local variable declartions.
     33
    3034u8data = ""
    3135lgth = 0
     
    4448        '\' characters are either escaped and unmarked or the following character in an odd length run is marked.
    4549        """
    46 
    4750        odd = simd_const_4('a',EOF_mask)
    4851        even = simd_const_4('5',EOF_mask)
     
    6568        global lgth
    6669        lgth = len(u8data)
    67 
     70       
    6871        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    6972        (u8, Lex, Ctrl) = byteclass.classify_bytes(bit)
     
    7679        return
    7780
    78 def parallel_prefix_parity(strm,lgth):
     81def parallel_prefix_parity(strm):
    7982        r"""
    8083        Translate to library function.
     
    99102        bitstream[k] = 0 --> even parity
    100103        """
     104        global lgth
     105#       lgth = len(u8data)
     106
    101107        t1 = strm
    102108        for i in range(0,int(math.ceil(math.log(lgth,2)))):
     
    113119        EOF_Mask  : 11111111111111111111111111111111_
    114120        <BLANKLINE>
    115         """
     121        """     
    116122        global lgth
    117123        lgth = len(u8data)
    118        
     124
    119125        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    120126        (u8, Lex, Ctrl) = byteclass.classify_bytes(bit)
    121         ParityMask = parallel_prefix_parity(Lex.DQuote,lgth)
     127        ParityMask = parallel_prefix_parity(Lex.DQuote)
    122128       
    123129        bitutil.print_aligned_streams([('Input Data', u8data),
     
    127133        return
    128134
    129 def value_starts(Lex,StringMask,EscapeChars,lgth):
     135
     136#TODO - Review this function.
     137def atom_starts(Lex,StringMask,EscapeChars):
    130138        r"""
    131139        This function returns multi-cursor start positions for each JSON value type.
    132         """
    133         ValueStartsMask = (StringMask | EscapeChars)
    134        
    135         ObjectStarts = Lex.LCurlyBrace &~ ValueStartsMask
    136         ArrayStarts = Lex.LSquareBracket &~ ValueStartsMask
    137        
    138         Comma = Lex.Comma &~ ValueStartsMask
    139         Colon = Lex.Colon &~ ValueStartsMask
     140       
     141        Define JSON atoms as 'String', 'Number', 'true', 'false', and 'null' types.
     142        """
     143        global lgth
     144        StringEscapeCharMask = (StringMask | EscapeChars)               
     145       
     146        ObjectStarts = Lex.LCurlyBrace &~ StringEscapeCharMask
     147        ArrayStarts = Lex.LSquareBracket &~ StringEscapeCharMask
     148               
     149        Comma = Lex.Comma &~ StringEscapeCharMask
     150        Colon = Lex.Colon &~ StringEscapeCharMask
    140151       
    141152        NonStringSyncPoint = (ArrayStarts | Comma | Colon)
     
    143154       
    144155        StringSyncPointFollows = bitutil.Advance(StringSyncPoint)
    145         StringStarts = (StringSyncPointFollows | bitutil.ScanThru(StringSyncPointFollows, Lex.WS)) & (Lex.DQuote &~ ValueStartsMask)
     156        StringStarts = (StringSyncPointFollows | bitutil.ScanThru(StringSyncPointFollows, Lex.WS)) & (Lex.DQuote &~ StringEscapeCharMask)
    146157
    147158        NonStringSyncPointFollows = bitutil.Advance(NonStringSyncPoint)
    148159        ValueCursors = (StringSyncPointFollows | bitutil.ScanThru(StringSyncPointFollows,Lex.WS))
    149160       
    150         NumberStarts = ValueCursors & ((Lex.Minus|Lex.Digit0_9) &~ ValueStartsMask)
    151         TrueStarts = ValueCursors & (Lex.t &~ ValueStartsMask)
    152         FalseStarts = ValueCursors & (Lex.f &~ ValueStartsMask)
    153         NullStarts = ValueCursors & (Lex.n &~ ValueStartsMask)
    154        
     161        NumberStarts = ValueCursors & ((Lex.Minus|Lex.Digit0_9) &~ StringEscapeCharMask)
     162        TrueStarts = ValueCursors & (Lex.t &~ StringEscapeCharMask)
     163        FalseStarts = ValueCursors & (Lex.f &~ StringEscapeCharMask)
     164        NullStarts = ValueCursors & (Lex.n &~ StringEscapeCharMask)
     165        ValueStarts = ValueCursors & (Lex.ValueStartsSet &~ StringEscapeCharMask)
     166
    155167        if debug:
    156168                bitutil.print_aligned_streams([('Input Data', u8data),
    157                               ('ValueStartsMask', bitutil.bitstream2string(ValueStartsMask, lgth)),
    158                               ('Comma', bitutil.bitstream2string(Comma, lgth)),
    159                               ('Colon', bitutil.bitstream2string(Colon, lgth)),
    160                               ('Lex.DQuote &~ ValueStartsMask', bitutil.bitstream2string((Lex.DQuote &~ ValueStartsMask), lgth)),
    161                               ('NonStringSyncPoint', bitutil.bitstream2string(NonStringSyncPoint, lgth)),
    162                               ('StringSyncPoint', bitutil.bitstream2string(StringSyncPoint, lgth)),
    163                               ('StringSyncPointFollows', bitutil.bitstream2string(StringSyncPointFollows, lgth)),
    164                               ('StringStarts', bitutil.bitstream2string(StringStarts, lgth)),
    165                               ('NonStringSyncPointFollows', bitutil.bitstream2string(NonStringSyncPointFollows, lgth)),
    166                               ('ValueCursors', bitutil.bitstream2string(ValueCursors, lgth))])
    167        
    168         return (ObjectStarts, ArrayStarts, StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts)
    169 
    170 
    171 def demo_value_starts(u8data):
    172         global lgth
     169                              ('StringEscapeCharMask', bitutil.bitstream2string(StringEscapeCharMask, lgth)),
     170                              ('ObjectStarts', bitutil.bitstream2string(ObjectStarts, lgth)),
     171                              ('ArrayStarts', bitutil.bitstream2string(ArrayStarts, lgth))])
     172       
     173        return (StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts, ValueStarts)
     174
     175
     176def demo_atom_starts(u8data):
     177        global lgth     
    173178        lgth = len(u8data)
    174179               
     
    180185        UnescapedDQuotes = (Lex.DQuote &~ EscapeChars)
    181186       
    182         ParityMask = parallel_prefix_parity(UnescapedDQuotes, lgth) & EOF_mask # TODO - Solve EOF_mask problem
     187        ParityMask = parallel_prefix_parity(UnescapedDQuotes) & EOF_mask # TODO - Solve EOF_mask problem
    183188        StringMask = ParityMask & bitutil.Advance(ParityMask)   
    184189       
    185         (ObjectStarts, ArrayStarts, StringStarts, NumberStarts,TrueStarts, FalseStarts, NullStarts) = value_starts(Lex, StringMask,EscapeChars,lgth)
     190        (StringStarts, NumberStarts,TrueStarts, FalseStarts, NullStarts, ValueStarts) = atom_starts(Lex, StringMask, EscapeChars)
    186191        bitutil.print_aligned_streams([('Input Data', u8data),
    187                               ('ObjectStarts', bitutil.bitstream2string(ObjectStarts, lgth)),
    188                               ('ArrayStarts', bitutil.bitstream2string(ArrayStarts, lgth)),
    189192                              ('StringStarts', bitutil.bitstream2string(StringStarts, lgth)),
    190193                              ('NumberStarts', bitutil.bitstream2string(NumberStarts, lgth)),
     
    232235        Scope4 = bitutil.Advance(Scope3)
    233236       
    234        
    235237        Errors |= Scope1 &~ Lex.a
    236238        Errors |= Scope2 &~ Lex.l
     
    238240        Errors |= Scope4 &~ Lex.e
    239241
    240         Follows = bitutil.Advance(Scope3)
     242        Follows = bitutil.Advance(Scope4)
    241243        Spans = Follows - Starts
    242244       
     
    281283        zero = %x30                ; 0 
    282284        """     
     285        global lgth
     286       
    283287        Errors = 0
    284288        M0 = Starts                                     # Initialize marker stream     
     
    313317        M11 = M7a | M10b                                        # Join
    314318
    315         ValueFollowSetErrors = M11 &~ (Lex.ValueFollowSet | ~EOF_mask)
     319        ValueFollowSetErrors = M11 &~ (Lex.ValueFollowsSet | Lex.WS)
     320        ValueFollowSetErrors |= M11 &~ EOF_mask
    316321       
    317322        Errors = E1 | E5b | E9b | ValueFollowSetErrors # E7a | E10b
     
    394399        <BLANKLINE>
    395400        """ 
    396        
    397401        global lgth
    398402        lgth = len(u8data)
    399 
     403       
    400404        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    401405        (u8, Lex, Ctrl) = byteclass.classify_bytes(bit)
     
    404408        UnescapedDQuotes = (Lex.DQuote &~ EscapeChars)
    405409       
    406         ParityMask = parallel_prefix_parity(UnescapedDQuotes, lgth) & EOF_mask
     410        ParityMask = parallel_prefix_parity(UnescapedDQuotes) & EOF_mask
    407411        StringMask = ParityMask & bitutil.Advance(ParityMask)   
    408412       
    409         (ObjectStarts, ArrayStarts, StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts) = value_starts(Lex, StringMask, EscapeChars,lgth
     413        (StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts, ValueStarts) = atom_starts(Lex, StringMask, EscapeChars)     
    410414       
    411415        (Errors, NumberSpans) = validate_number(NumberStarts, Lex, EOF_mask)
     
    423427                              ('EOF_Mask', bitutil.bitstream2string(EOF_mask, lgth+1))])
    424428
    425 def validate_string(StringStarts, Lex,Ctrl,StringMask,EscapeChars,UnescapedDQuotes, lgth):
     429def validate_string(StringStarts, Lex,Ctrl,StringMask,EscapeChars,UnescapedDQuotes):
    426430        r"""
    427431        RFC 4627 - JavaScript Object Notation (JSON) 
     
    448452        (2) validation of unescaped characters.
    449453        """
     454        global lgth
    450455       
    451456        # (1) Validate escape characters
     
    511516        global lgth
    512517        lgth = len(u8data)
    513 
     518       
    514519        (bit, EOF_mask) = bitutil.transpose_streams(u8data)
    515520        (u8, Lex, Ctrl) = byteclass.classify_bytes(bit)
    516        
     521
    517522        # Construct string interiors mask (1),(2),(3)
    518523        # (1) Mark all escaped characters
    519524        EscapeChars = parse_escape(Lex, EOF_mask)
    520        
     525
    521526        # (2) Mark all unescaped "
    522527        UnescapedDQuotes = (Lex.DQuote &~ EscapeChars)
    523        
     528
    524529        # (3) Construct string interiors mask
    525         ParityMask = parallel_prefix_parity(UnescapedDQuotes, lgth) & EOF_mask # TODO - Solve parity EOF_mask problem
     530        ParityMask = parallel_prefix_parity(UnescapedDQuotes) & EOF_mask # TODO - Solve parity EOF_mask problem
    526531        StringMask = ParityMask & bitutil.Advance(ParityMask)
    527                                
    528         (ObjectStarts, ArrayStarts, StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts) = value_starts(Lex, StringMask, EscapeChars,lgth)                         
    529                                
    530         (Errors, StringSpans) = validate_string(StringStarts, Lex,Ctrl,StringMask,EscapeChars,UnescapedDQuotes,lgth)
     532
     533        (StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts, ValueStarts) = atom_starts(Lex, StringMask, EscapeChars)                             
     534       
     535        (Errors, StringSpans) = validate_string(StringStarts, Lex,Ctrl,StringMask,EscapeChars,UnescapedDQuotes)
    531536       
    532537        bitutil.print_aligned_streams([('Input Data', u8data),
     
    542547        return
    543548
    544 def validate_json(u8data,lgth):
     549def validate_json_atoms(u8data):
    545550               
    546551        Errors = 0       
     
    552557        EscapeChars = parse_escape(Lex, EOF_mask)
    553558        UnescapedDQuotes = (Lex.DQuote &~ EscapeChars)
    554         ParityMask = parallel_prefix_parity(UnescapedDQuotes, lgth) & EOF_mask # TODO - Solve parity EOF_mask problem
     559        ParityMask = parallel_prefix_parity(UnescapedDQuotes) & EOF_mask # TODO - Solve parity EOF_mask problem
    555560        StringMask = ParityMask & bitutil.Advance(ParityMask)   
    556561       
    557         (ObjectStarts, ArrayStarts, StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts) = value_starts(Lex, StringMask, EscapeChars,lgth)
    558        
    559         (StringErrors, StringSpans) = validate_string(StringStarts, Lex, Ctrl, StringMask, EscapeChars, UnescapedDQuotes, lgth)
     562        (StringStarts, NumberStarts, TrueStarts, FalseStarts, NullStarts, ValueStarts) = atom_starts(Lex, StringMask, EscapeChars)
     563       
     564        (StringErrors, StringSpans) = validate_string(StringStarts, Lex, Ctrl, StringMask, EscapeChars, UnescapedDQuotes)
    560565       
    561566        (NumberErrors, NumberSpans) = validate_number(NumberStarts, Lex, EOF_mask)
     
    566571       
    567572        (NullErrors, NullSpans) = validate_null(NullStarts, Lex)
    568        
    569         return (ObjectStarts, ArrayStarts,
    570                 StringStarts, StringSpans, StringErrors,
     573
     574        return (StringStarts, StringSpans, StringErrors,
    571575                NumberStarts, NumberSpans, NumberErrors,
    572576                TrueStarts, TrueSpans, TrueErrors,
     
    574578                NullStarts, NullSpans, NullErrors)
    575579 
    576 def demo_validate_json(u8data):
    577  
     580def demo_validate_json_atoms(u8data):
    578581        global lgth
    579582        lgth = len(u8data)
    580        
    581         (ObjectStarts, ArrayStarts,
    582         StringStarts, StringSpans, StringErrors,
     583
     584        (StringStarts, StringSpans, StringErrors,
    583585        NumberStarts, NumberSpans, NumberErrors,
    584586        TrueStarts, TrueSpans, TrueErrors,
    585587        FalseStarts, FalseSpans, FalseErrors,
    586         NullStarts, NullSpans, NullErrors) = validate_json(u8data,lgth)
     588        NullStarts, NullSpans, NullErrors) = validate_json_atoms(u8data)
    587589
    588590        bitutil.print_aligned_streams([('Input Data', u8data),
    589                               ('ObjectStarts', bitutil.bitstream2string(ObjectStarts, lgth)),
    590                               ('ArrayStarts', bitutil.bitstream2string(ArrayStarts, lgth)),
    591591                              ('StringStarts', bitutil.bitstream2string(StringStarts, lgth)),
    592592                              ('StringSpans', bitutil.bitstream2string(StringSpans, lgth)),
     
    616616
    617617        u8data = bitutil.readfile(sys.argv[1])
     618
    618619#       demo_parse_escape(u8data)
    619620#       demo_parallel_prefix_parity(u8data)
    620 #       demo_value_starts(u8data)
     621#       demo_atom_starts(u8data)
    621622#       demo_validate_number(u8data)
    622623#       demo_validate_string(u8data)
    623         demo_validate_json(u8data)
     624        demo_validate_json_atoms(u8data)
Note: See TracChangeset for help on using the changeset viewer.