Changeset 2896 for proto


Ignore:
Timestamp:
Feb 3, 2013, 5:09:09 PM (7 years ago)
Author:
lindanl
Message:

Name Parsing using Lookahead

Location:
proto/PDF
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/cb_pablo.py

    r2859 r2896  
    3939        Oct = 0
    4040        Nonoct = 0
     41        Regular = 0
    4142       
    4243class Parity():
     
    5758    delmask = 0
    5859    hexString_mask = 0
     60    names_escapes = 0
    5961    bit_0 = 0
    6062    bit_1 = 0
     
    6870class marker():
    6971    hex_opener = 0
     72    name_starts = 0
     73    numeric_starts = 0
     74   
    7075   
    7176
     
    186191        lex.Oct = (temp39 &~ basis_bits.bit_4)
    187192        lex.Nonoct = (temp16 | temp19)
     193        lex.Regular = ~ (lex.Special & lex.WS)
    188194
    189195       
     
    212218        #escape_Callouts.Oct3 = lex.Oct & pablo.Advance(escape_Callouts.Oct2)
    213219        out_Callouts.delmask = escape_char
     220
     221#def Parse_CommentsLiteralStrings(lex, escaped_Callouts, marker, comments_Callouts, literalStrings_Callouts) :
     222        ##not entirely sure this code is correct once compiled
     223        #depth = 0
     224        #comment_starts=0
     225        #comment_ends=0
     226        #marker.starts = 0
     227        #marker.others = 0
     228        #literalStrings_Callouts.starts = 0
     229        #literalStrings_Callouts.ends = 0
     230
     231        #scan_characters = escaped_Callouts.UELParen | escaped_Callouts.UERParen | lex.Percent
     232        #cursor = pablo.ScanToFirst(scan_characters)
     233        #while pablo.inFile(cursor) :
     234                #comment_cursor = cursor & lex.Percent
     235                #string_cursor = cursor & (escaped_Callouts.UELParen|escaped_Callouts.UERParen)
     236                #if comment_cursor :
     237                        #comment_starts |=comment_cursor
     238                        #comment_cursor = pablo.ScanTo(comment_cursor, lex.EOL)
     239                        #comment_ends |=comment_cursor
     240                        #cursor = pablo.ScanTo(pablo.Advance(comment_cursor), scan_characters)
     241               
     242                #if string_cursor :
     243                        ##There should be a better way to do this
     244                        #if (string_cursor & escaped_Callouts.UELParen) :
     245                                #if is_zero():
     246                                        #literalStrings_Callouts.starts|=string_cursor
     247
     248                                #inc()
     249                        #if (string_cursor & escaped_Callouts.UELParen) :
     250                                #cursor = pablo.ScanTo(pablo.Advance(string_cursor), escaped_Callouts.UELParen | escaped_Callouts.UERParen)
     251                        #if (string_cursor & escaped_Callouts.UERParen) :
     252                                #dec()
     253                        #if (string_cursor & escaped_Callouts.UERParen) :
     254                                #if is_zero():
     255                                        #literalStrings_Callouts.ends|=string_cursor
     256                                        #cursor = pablo.ScanTo(pablo.Advance(string_cursor), scan_characters)
     257                                #if lt_zero():
     258                                        #literalStrings_Callouts.error|=string_cursor
     259                                        #cursor = pablo.ScanTo(pablo.Advance(string_cursor), scan_characters)
     260                                #if ~(is_zero() | lt_zero()):
     261                                        #cursor = pablo.ScanTo(pablo.Advance(string_cursor), escaped_Callouts.UELParen | escaped_Callouts.UERParen)
     262                                       
     263
     264        #comments_Callouts.mask = pablo.SpanUpTo(comment_starts, comment_ends)
     265        #literalStrings_Callouts.mask = pablo.InclusiveSpan(literalStrings_Callouts.starts,literalStrings_Callouts.ends)
     266        #literalStrings_Callouts.escapes = escaped_Callouts.escapes & literalStrings_Callouts.mask
     267        #literalStrings_Callouts.carriage_returns = lex.CR & literalStrings_Callouts.mask &~ escaped_Callouts.escaped
     268        #marker.mask = comments_Callouts.mask | literalStrings_Callouts.mask
     269
    214270       
    215271def Parse_HexStrings(lex, marker, out_Callouts) :
     
    219275        out_Callouts.hexString_mask = pablo.InclusiveSpan(hexString_starts,hexString_ends)
    220276        marker.hex_opener = hexString_starts
    221         out_Callouts.delmask |= out_Callouts.hexString_mask & lex.WS
    222 
    223 ##def Parse_Names(lex, marker, out_Callouts) :
    224 ##      name_starts =lex.Slash #&~marker.mask
    225 ##      names_follows = pablo.ScanThru(pablo.Advance(name_starts), lex.Regular)
    226 ##      names_Callouts.escapes = (pablo.Advance(pablo.Advance(lex.Hash)) & pablo.Advance(lex.Hex) & lex.Hex) & pablo.SpanUpTo(names_starts, names_follows)
    227 ##      marker.name_starts = name_starts
    228 ##
    229 ##     
    230 ##def Parse_Numeric(lex, marker, out_Callouts) :
    231 ##      numeric_characters = (lex.Digit | lex.Period | lex.Sign)
    232 ##      numeric_starts = (numeric_characters &~ pablo.Advance(lex.Regular)) &~ marker.mask
    233 ##      numeric_follows = pablo.ScanThru(numeric_starts, lex.Regular)
    234 ##      marker.numeric_starts = numeric_starts
     277
     278def Parse_Names(lex, marker, out_Callouts) :
     279        name_starts =lex.Slash #&~marker.mask
     280        names_follows = pablo.ScanThru(pablo.Advance(name_starts), lex.Regular)
     281        out_Callouts.names_escapes = lex.Hash & pablo.Lookahead(lex.Hex) & pablo.Lookahead(lex.Hex,2)
     282        marker.name_starts = name_starts
     283       
     284def Parse_Numeric(lex, marker, out_Callouts) :
     285        numeric_characters = (lex.Digit | lex.Period | lex.Sign)
     286        numeric_starts = (numeric_characters &~ pablo.Advance(lex.Regular)) #&~ marker.mask
     287        numeric_follows = pablo.ScanThru(numeric_starts, lex.Regular)
     288        marker.numeric_starts = numeric_starts 
     289
    235290       
    236291def Prepare_content_buffer(basis_bits, lex, escape_Callouts, out_Callouts):
    237  
    238     out_Callouts.zeromask |= out_Callouts.delmask
    239 
     292
     293    out_Callouts.bit_0 = basis_bits.bit_0
    240294    out_Callouts.bit_1 = basis_bits.bit_1
    241295    out_Callouts.bit_2 = basis_bits.bit_2
     
    248302   
    249303    if out_Callouts.hexString_mask:
     304     
     305        out_Callouts.delmask |= out_Callouts.hexString_mask & lex.WS
     306       
    250307        NondigitHexString = lex.Hex &~ lex.Digit
    251308        out_Callouts.bit_0 &= ~out_Callouts.hexString_mask
     
    269326        out_Callouts.bit_6 ^= ( lex.pdf_mod_bit_6 & escape_Callouts.Nonoct)
    270327        out_Callouts.bit_7 ^= ( lex.pdf_mod_bit_7 & escape_Callouts.Nonoct)
    271    
    272    
     328       
     329    if out_Callouts.names_escapes:
     330        names_escapes_scope1 = pablo.Advance(out_Callouts.names_escapes)
     331        names_escapes_scope2 = pablo.Advance(names_escapes_scope1)
     332        out_Callouts.delmask |= out_Callouts.names_escapes | names_escapes_scope1
     333        out_Callouts.bit_0 = (out_Callouts.bit_0 &~ names_escapes_scope2) #| pablo.Advance(out_Callouts.bit_4 & names_escapes_scope1)
     334        out_Callouts.bit_1 = (out_Callouts.bit_1 &~ names_escapes_scope2) #| pablo.Advance(out_Callouts.bit_5 & names_escapes_scope1)
     335        out_Callouts.bit_2 = (out_Callouts.bit_2 &~ names_escapes_scope2) #| pablo.Advance(out_Callouts.bit_6 & names_escapes_scope1)
     336        out_Callouts.bit_3 = (out_Callouts.bit_3 &~ names_escapes_scope2) #| pablo.Advance(out_Callouts.bit_7 & names_escapes_scope1)
     337        out_Callouts.bit_0 |= pablo.Advance(out_Callouts.bit_4 & names_escapes_scope1)
     338        out_Callouts.bit_1 |= pablo.Advance(out_Callouts.bit_5 & names_escapes_scope1)
     339        out_Callouts.bit_2 |= pablo.Advance(out_Callouts.bit_6 & names_escapes_scope1)
     340        out_Callouts.bit_3 |= pablo.Advance(out_Callouts.bit_7 & names_escapes_scope1)
     341     
     342 
     343    out_Callouts.zeromask |= out_Callouts.delmask 
    273344    out_Callouts.bit_0 = out_Callouts.bit_0 &~ out_Callouts.zeromask   
    274345    out_Callouts.bit_1 = out_Callouts.bit_1 &~ out_Callouts.zeromask
  • proto/PDF/cb_template.cpp

    r2859 r2896  
    1717#include "../../simd-lib/perflib/perfsec.h"
    1818
    19 #define BUFSIZE 1000000
     19
    2020
    2121#ifdef BUFFER_PROFILING
     
    4747static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4848
    49 void do_process(FILE *infile, FILE *outfile);
     49void do_process(FILE *infile, FILE *outfile, int filesize);
     50
     51
    5052
    5153int main(int argc, char * argv[]) {
     
    8284        PERF_SEC_INIT(parser_timer);
    8385
    84         do_process(infile, outfile);
     86        do_process(infile, outfile, fileinfo.st_size+1);
    8587
    8688        PERF_SEC_DUMP(parser_timer);
     
    212214}
    213215
    214 void do_process(FILE *infile, FILE *outfile) {
    215 
    216 @decl
     216void do_process(FILE *infile, FILE *outfile, int filesize) {
     217 
     218  int BLOCKS = filesize/BLOCK_SIZE+1;
     219
     220  struct Basis_bits * basis_bits = (struct Basis_bits *)malloc(sizeof(struct Basis_bits)*BLOCKS);
     221
     222  struct Lex * lex = (struct Lex *)malloc(sizeof(struct Lex)*BLOCKS);
     223 
     224  struct Parity parity;
     225
     226  struct Escape_Callouts escape_Callouts;
     227
     228  struct Out_Callouts out_Callouts;
     229
     230  struct Marker marker;
    217231
    218232  int block_base = 0;
    219233  int chars_read = 0;
    220234  int chars_avail = 0;
    221   char srcbuf[BUFSIZE];
    222   char content_buf[BUFSIZE];
     235  char * srcbuf = (char*)malloc(filesize);
     236  char * content_buf = (char*)malloc(filesize);
    223237  char * content_buf_ptr =  content_buf;
    224238
     
    227241 
    228242
    229   chars_read = fread((void *)srcbuf, 1, BUFSIZE, infile);
     243  chars_read = fread((void *)srcbuf, 1, filesize, infile);
    230244  chars_avail = chars_read;
    231245
     
    234248
    235249    PERF_SEC_START(parser_timer);
    236 
    237 /* Full Blocks */
     250   
     251    for (int i = 0; i < BLOCKS; i++){
     252      s2p_do_block((BytePack *) &srcbuf[i*BLOCK_SIZE], basis_bits[i]);
     253      classify_bytes.do_block(basis_bits[i], lex[i]);
     254    }
     255   
     256    for (int i = 0; i < BLOCKS-1; i++){
     257      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts);
     258      parse_HexStrings.do_block(lex[i], marker, out_Callouts);
     259      parse_Names.do_block(lex[i], marker, out_Callouts,lex[i+1]);
     260      prepare_content_buffer.do_block(basis_bits[i], lex[i], escape_Callouts, out_Callouts);
     261      Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
     262    }
     263    /*final block*/
     264    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-chars_avail));
     265    parse_Escaped.do_final_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
     266    parse_HexStrings.do_final_block(lex[BLOCKS-1], marker, out_Callouts, EOF_mask);
     267    parse_Names.do_final_block(lex[BLOCKS-1], marker, out_Callouts, EOF_mask);
     268    prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], escape_Callouts, out_Callouts, EOF_mask);
     269    out_Callouts.delmask |= ~EOF_mask;         
     270    Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
     271/*
     272
    238273    while (chars_avail >= BLOCK_SIZE) {
    239274          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     
    243278          chars_avail -= BLOCK_SIZE;
    244279    }
    245 /*Partial Blocks*/
     280
    246281    if (chars_avail > 0 || @any_carry) {
    247282          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-chars_avail));
     
    251286          Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
    252287    }
    253    
     288*/   
    254289    PERF_SEC_END(parser_timer, chars_read);
    255290   
Note: See TracChangeset for help on using the changeset viewer.