Changeset 2562 for proto/PDF


Ignore:
Timestamp:
Oct 26, 2012, 12:04:30 PM (6 years ago)
Author:
bhull
Message:

Test data

Location:
proto/PDF
Files:
45 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/pdf_demo.py

    r2559 r2562  
    4848class Names_Callouts() :
    4949        starts = 0
    50         ends = 0
     50        follows = 0
    5151
    5252class HexStrings_Callouts() :
     
    160160
    161161
    162 def GetEven(input):
    163         #ugly temporary version
    164         even = 2
    165         while (even <=input*2):
    166                         even = even * 4 + 2
    167         return even
    168 
    169 def GetOdd(input):
    170         #ugly temporary version
    171         odd = 1
    172         while (odd <=input*2):
    173                         odd = odd * 4 + 1
    174         return odd
    175 
    176 
    177 def Parse_Escaped(lex, Escaped_Callouts):
    178         odd = GetOdd(lex.Backslash)
    179         even = GetEven(lex.Backslash)
    180                    
     162
     163def simd_const_4(hexdigit, EOF_mask):
     164        lgth = pablo.count_leading_zeroes(~EOF_mask)/4
     165        return int(hexdigit*(lgth+1),16)&EOF_mask
     166
     167
     168def Parse_Escaped(lex, escaped_Callouts):
     169
     170        odd = simd_const_4('a',pablo.EOF_mask) 
     171        even = simd_const_4('5',pablo.EOF_mask)
     172
    181173        start = lex.Backslash &~ pablo.Advance(lex.Backslash)
    182174        even_start = start & even
     
    189181        escape = escape | (odd_final & even)
    190182                   
    191         Escaped_Callouts.UELParen = lex.LParen &~ escape
    192         Escaped_Callouts.UERParen = lex.RParen &~ escape
     183        escaped_Callouts.UELParen = lex.LParen &~ escape
     184        escaped_Callouts.UERParen = lex.RParen &~ escape
    193185
    194186               
     
    234226
    235227
    236 def Parse_CommentsLiteralStrings(lex, Escaped_Callouts, Marker, Comments_Callouts, LiteralStrings_Callouts) :
     228def Parse_CommentsLiteralStrings(lex, escaped_Callouts, marker, comments_Callouts, literalStrings_Callouts) :
    237229        #not entirely sure this code is correct once compiled
    238230        depth = 0
    239231        comment_starts=0
    240         comment_ends=0
    241         scan_characters = Escaped_Callouts.UELParen |Escaped_Callouts.UERParen | lex.Percent
     232        comment_follows=0
     233        scan_characters = escaped_Callouts.UELParen |escaped_Callouts.UERParen | lex.Percent
    242234        cursor = pablo.ScanToFirst(scan_characters)
    243         while cursor & pablo.EOF_mask :
     235        while pablo.inFile(cursor) :
    244236                comment_cursor = cursor & lex.Percent
    245                 string_cursor = cursor & (Escaped_Callouts.UELParen|Escaped_Callouts.UERParen)
     237                string_cursor = cursor & (escaped_Callouts.UELParen|escaped_Callouts.UERParen)
    246238                if comment_cursor :
    247239                        comment_starts |=comment_cursor
    248240                        comment_cursor = pablo.ScanTo(comment_cursor, lex.EOL)
    249                         comment_ends |=comment_cursor
     241                        comment_follows |=comment_cursor
    250242                        cursor = pablo.ScanTo(pablo.Advance(comment_cursor), scan_characters)
     243   
     244
    251245               
    252246                if string_cursor :
    253                         if (string_cursor & Escaped_Callouts.UELParen) :
     247                        if (string_cursor & escaped_Callouts.UELParen) :
    254248                                if depth==0:
    255                                         LiteralStrings_Callouts.starts|=string_cursor
     249                                        literalStrings_Callouts.starts|=string_cursor
     250
    256251                                depth+=1
    257                                 cursor = pablo.ScanTo(pablo.Advance(string_cursor), Escaped_Callouts.UELParen | Escaped_Callouts.UERParen)
    258                         if (string_cursor & Escaped_Callouts.UERParen) :
     252
     253                                cursor = pablo.ScanTo(pablo.Advance(string_cursor), escaped_Callouts.UELParen | escaped_Callouts.UERParen)
     254                        if (string_cursor & escaped_Callouts.UERParen) :
     255
    259256                                depth-=1
     257
    260258                                if depth==0:
    261                                         LiteralStrings_Callouts.ends|=string_cursor
     259                                        literalStrings_Callouts.ends|=string_cursor
    262260                                        cursor = pablo.ScanTo(pablo.Advance(string_cursor), scan_characters)
    263261                                elif depth==-1:
    264                                         LiteralStrings_Callouts.error|=string_cursor
     262                                        literalStrings_Callouts.error|=string_cursor
    265263                                        cursor = pablo.ScanTo(pablo.Advance(string_cursor), scan_characters)
    266264                                else:
    267                                         cursor = pablo.ScanTo(pablo.Advance(string_cursor), Escaped_Callouts.UELParen | Escaped_Callouts.UERParen)
     265                                        cursor = pablo.ScanTo(pablo.Advance(string_cursor), escaped_Callouts.UELParen | escaped_Callouts.UERParen)
    268266                                       
    269267               
     
    271269        #if (depth>0):
    272270                #error
    273         Comments_Callouts.mask = (comment_ends - comment_starts) | comment_ends
    274         LiteralStrings_Callouts.mask = (LiteralStrings_Callouts.ends-LiteralStrings_Callouts.starts) | LiteralStrings_Callouts.ends
    275         Marker.mask = Comments_Callouts.mask | LiteralStrings_Callouts.mask
    276 
    277 
    278 def Parse_Names(lex, Marker, Names_Callouts) :
    279         Names_Callouts.starts = pablo.Advance(lex.Slash&~Marker.mask)
    280         Names_Callouts.follows = pablo.ScanThru(Names_Callouts.starts, lex.Regular)
    281         Marker.mask |=Names_Callouts.starts
    282 
    283 def Parse_HexStrings(lex, Marker, HexStrings_Callouts) :
     271        comments_Callouts.mask = (comment_follows - comment_starts) | comment_follows
     272        literalStrings_Callouts.mask = (literalStrings_Callouts.ends-literalStrings_Callouts.starts) | literalStrings_Callouts.ends
     273        marker.mask = comments_Callouts.mask | literalStrings_Callouts.mask
     274
     275
     276def Parse_Names(lex, marker, names_Callouts) :
     277        names_Callouts.starts = pablo.Advance(lex.Slash&~marker.mask)
     278        names_Callouts.follows = pablo.ScanThru(names_Callouts.starts, lex.Regular)
     279        marker.mask |=names_Callouts.starts
     280
     281def Parse_HexStrings(lex, marker, hexStrings_Callouts) :
    284282        #Haven't confirmed that this is the correct behaviour with bad input
    285         HexStrings_Callouts.starts = lex.LAngle &~Marker.mask
    286         HexStrings_Callouts.ends = pablo.ScanThru(pablo.Advance(HexStrings_Callouts.starts),lex.Hex|lex.WS)
    287         HexStrings_Callouts.error = HexStrings_Callouts.ends &~ lex.RAngle
    288         Marker.mask |= HexStrings_Callouts.ends - HexStrings_Callouts.starts
    289 
    290 def Parse_Numeric(lex, Marker, Numeric_Callouts) :
     283        hexStrings_Callouts.starts = lex.LAngle &~marker.mask
     284        hexStrings_Callouts.ends = pablo.ScanThru(pablo.Advance(hexStrings_Callouts.starts),lex.Hex|lex.WS)
     285        hexStrings_Callouts.error = hexStrings_Callouts.ends &~ lex.RAngle
     286        marker.mask |= hexStrings_Callouts.ends - hexStrings_Callouts.starts
     287
     288def Parse_Numeric(lex, marker, numeric_Callouts) :
    291289        numeric_characters = (lex.Digit | lex.Period | lex.Sign)
    292         Numeric_Callouts.starts =(numeric_characters &~ pablo.Advance(lex.Regular)) &~ Marker.mask
    293         Numeric_Callouts.follows =pablo.ScanThru(Numeric_Callouts.starts, lex.Regular)
    294        
    295         # aftersign = pablo.Advance(Numeric_Callouts.starts & lex.Sign) | (Numeric_Callouts.starts &~ lex.Sign)
     290        numeric_Callouts.starts =(numeric_characters &~ pablo.Advance(lex.Regular)) &~ marker.mask
     291        numeric_Callouts.follows =pablo.ScanThru(numeric_Callouts.starts, lex.Regular)
     292        marker.mask |= numeric_Callouts.starts
     293
     294        # aftersign = pablo.Advance(numeric_Callouts.starts & lex.Sign) | (numeric_Callouts.starts &~ lex.Sign)
    296295        # temp = pablo.ScanThru(aftersign, lex.Digit)
    297296        # afterperiod = pablo.Advance(temp & lex.Period) | (temp &~ lex.Period)
    298         # Numeric_Callouts.ends = pablo.ScanThru(afterperiod, lex.Digit)
    299         # Numeric_Callouts.error = Numeric_Callouts.ends &~ (lex.WS | lex.Delimiter)
    300         # Marker.mask |= Numeric_Callouts.starts
    301 
    302 def Parse_Keywords(lex, Marker, Keywords_Callouts) :
    303         Keywords_Callouts.starts = (lex.Regular &~ pablo.Advance(lex.Regular)) &~ Marker.mask
    304         Keywords_Callouts.ends = pablo.ScanThru(Keywords_Callouts.starts, lex.Regular)
    305        
    306 def Parse_Arrays(lex, Marker, Arrays_Callouts) :
    307         Arrays_Callouts.starts = lex.LBracket &~ Marker.mask
    308         Arrays_Callouts.ends = lex.RBracket &~ Marker.mask
     297        # numeric_Callouts.follows = pablo.ScanThru(afterperiod, lex.Digit)
     298        # numeric_Callouts.error = numeric_Callouts.follows &~ (lex.WS | lex.Delimiter)
     299        # marker.mask |= numeric_Callouts.starts
     300
     301def Parse_Keywords(lex, marker, keywords_Callouts) :
     302        keywords_Callouts.starts = (lex.Regular &~ pablo.Advance(lex.Regular)) &~ marker.mask
     303        keywords_Callouts.follows = pablo.ScanThru(keywords_Callouts.starts, lex.Regular)
     304       
     305def Parse_Arrays(lex, marker, arrays_Callouts) :
     306        arrays_Callouts.starts = lex.LBracket &~ marker.mask
     307        arrays_Callouts.follows = lex.RBracket &~ marker.mask
    309308       
    310309basis_bits = Basis_bits()
    311310lex = Lex()
    312 LiteralStrings_Callouts = LiteralStrings_Callouts()
    313 Escaped_Callouts = Escaped_Callouts()
    314 Comments_Callouts = Comments_Callouts()
    315 Names_Callouts = Names_Callouts()
    316 HexStrings_Callouts = HexStrings_Callouts()
    317 Numeric_Callouts = Numeric_Callouts()
    318 Keywords_Callouts = Keywords_Callouts()
    319 Arrays_Callouts = Arrays_Callouts()
    320 Marker = Marker()
     311literalStrings_Callouts = LiteralStrings_Callouts()
     312escaped_Callouts = Escaped_Callouts()
     313comments_Callouts = Comments_Callouts()
     314names_Callouts = Names_Callouts()
     315hexStrings_Callouts = HexStrings_Callouts()
     316numeric_Callouts = Numeric_Callouts()
     317keywords_Callouts = Keywords_Callouts()
     318arrays_Callouts = Arrays_Callouts()
     319marker = Marker()
    321320
    322321if __name__ == "__main__":
     
    326325                pablo.EOF_mask = pablo.transpose_streams(u8data, basis_bits)
    327326                Classify_bytes(basis_bits, lex)
    328                 Parse_Escaped(lex, Escaped_Callouts)
    329                 Parse_CommentsLiteralStrings(lex, Escaped_Callouts, Marker, Comments_Callouts, LiteralStrings_Callouts)
    330                 Parse_Names(lex, Marker, Names_Callouts)
    331                 Parse_HexStrings(lex, Marker, HexStrings_Callouts)
    332                 Parse_Numeric(lex, Marker, Numeric_Callouts)
    333                 Parse_Keywords(lex, Marker, Keywords_Callouts)
    334                 Parse_Arrays(lex, Marker, Arrays_Callouts)
    335                
    336                 print "data:" + " "*(16-5) + u8data
    337                 print "literal mask" + " "*(16-12) + pablo.bitstream2string(LiteralStrings_Callouts.mask, lgth+1)
    338                 print "literal starts" + " "*(16-14) + pablo.bitstream2string(LiteralStrings_Callouts.starts, lgth+1)
    339                 print "comment mask"+ " "*(16-12) + pablo.bitstream2string(Comments_Callouts.mask, lgth+1)
    340                 print "names starts"+ " "*(16-12) + pablo.bitstream2string(Names_Callouts.starts, lgth+1)
    341                 print "names follows"+ " "*(16-13) + pablo.bitstream2string(Names_Callouts.follows, lgth+1)
    342                 print "hex starts"+ " "*(16-10) + pablo.bitstream2string(HexStrings_Callouts.starts, lgth+1)
    343                 print "hex ends"+ " "*(16-8) + pablo.bitstream2string(HexStrings_Callouts.ends, lgth+1)
    344                 print "num starts"+ " "*(16-10) + pablo.bitstream2string(Numeric_Callouts.starts, lgth+1)
    345                 print "num follows"+ " "*(16-11) + pablo.bitstream2string(Numeric_Callouts.follows, lgth+1)
    346                 print "key starts"+ " "*(16-10) + pablo.bitstream2string(Keywords_Callouts.starts, lgth+1)
    347                 print "key ends"+ " "*(16-8) + pablo.bitstream2string(Keywords_Callouts.ends, lgth+1)
    348                 print "array starts"+ " "*(16-12) + pablo.bitstream2string(Arrays_Callouts.starts, lgth+1)
    349                 print "array ends"+ " "*(16-10) + pablo.bitstream2string(Arrays_Callouts.ends, lgth+1)
    350                 print "regular"+ " "*(16-7) + pablo.bitstream2string(lex.Regular, lgth+1)
    351                
    352 
    353                
    354                 print "errors" + " "*(16-6) + pablo.bitstream2string(LiteralStrings_Callouts.error|HexStrings_Callouts.error | Numeric_Callouts.error, lgth+1)
     327                Parse_Escaped(lex, escaped_Callouts)
     328                Parse_CommentsLiteralStrings(lex, escaped_Callouts, marker, comments_Callouts, literalStrings_Callouts)
     329                Parse_Names(lex, marker, names_Callouts)
     330                Parse_HexStrings(lex, marker, hexStrings_Callouts)
     331                Parse_Numeric(lex, marker, numeric_Callouts)
     332                Parse_Keywords(lex, marker, keywords_Callouts)
     333                Parse_Arrays(lex, marker, arrays_Callouts)
     334               
     335#               print "data:" + " "*(16-5) + u8data
     336#               print "literal starts" + " "*(16-14) + pablo.bitstream2string(literalStrings_Callouts.starts, lgth+1)
     337#               print "literal ends" + " "*(16-12) + pablo.bitstream2string(literalStrings_Callouts.ends, lgth+1)
     338#               print "names starts"+ " "*(16-12) + pablo.bitstream2string(names_Callouts.starts, lgth+1)
     339#               print "names follows"+ " "*(16-13) + pablo.bitstream2string(names_Callouts.follows, lgth+1)
     340#               print "hex starts"+ " "*(16-10) + pablo.bitstream2string(hexStrings_Callouts.starts, lgth+1)
     341#               print "hex ends"+ " "*(16-8) + pablo.bitstream2string(hexStrings_Callouts.ends, lgth+1)
     342#               print "num starts"+ " "*(16-10) + pablo.bitstream2string(numeric_Callouts.starts, lgth+1)
     343#               print "num follows"+ " "*(16-11) + pablo.bitstream2string(numeric_Callouts.follows, lgth+1)
     344#               print "key starts"+ " "*(16-10) + pablo.bitstream2string(keywords_Callouts.starts, lgth+1)
     345#               print "key follows"+ " "*(16-11) + pablo.bitstream2string(keywords_Callouts.follows, lgth+1)
     346#               print "array starts"+ " "*(16-12) + pablo.bitstream2string(arrays_Callouts.starts, lgth+1)
     347#               print "array ends"+ " "*(16-10) + pablo.bitstream2string(arrays_Callouts.ends, lgth+1)
     348#               print "errors" + " "*(16-6) + pablo.bitstream2string(literalStrings_Callouts.error|hexStrings_Callouts.error | numeric_Callouts.error, lgth+1)
     349
     350
     351                print pablo.bitstream2string(literalStrings_Callouts.starts, lgth+1)
     352                print pablo.bitstream2string(literalStrings_Callouts.ends, lgth+1)
     353                print pablo.bitstream2string(names_Callouts.starts, lgth+1)
     354                print pablo.bitstream2string(names_Callouts.follows, lgth+1)
     355                print pablo.bitstream2string(hexStrings_Callouts.starts, lgth+1)
     356                print pablo.bitstream2string(hexStrings_Callouts.ends, lgth+1)
     357                print pablo.bitstream2string(numeric_Callouts.starts, lgth+1)
     358                print pablo.bitstream2string(numeric_Callouts.follows, lgth+1)
     359                print pablo.bitstream2string(keywords_Callouts.starts, lgth+1)
     360                print pablo.bitstream2string(keywords_Callouts.follows, lgth+1)
     361                print pablo.bitstream2string(arrays_Callouts.starts, lgth+1)
     362                print pablo.bitstream2string(arrays_Callouts.ends, lgth+1)
     363                print pablo.bitstream2string(literalStrings_Callouts.error|hexStrings_Callouts.error | numeric_Callouts.error, lgth+1)
    355364               
    356365        else:
    357                 print("Usage: python pdf.py <file>")
    358        
    359 
     366                print("Usage: python pdf_demo.py <file>")
     367       
     368
Note: See TracChangeset for help on using the changeset viewer.