Changeset 2911 for proto/PDF


Ignore:
Timestamp:
Feb 19, 2013, 2:23:46 PM (6 years ago)
Author:
lindanl
Message:

Add token generation

Location:
proto/PDF
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/cb_pablo.py

    r2908 r2911  
    5656class Out_Callouts():
    5757    mask = 0
    58     comment = 0
     58    hex_opener = 0
    5959    zeromask = 0
    6060    delmask = 0
     
    7171   
    7272class marker():
    73     hex_opener = 0
    74     name_starts = 0
    75     numeric_starts = 0
    76     instring = 0
     73    starts = 0
     74    ends = 0
    7775    error = 0
    7876   
     
    236234       
    237235def Parse_Comment(lex, escape_Callouts, marker, out_Callouts):
     236  out_Callouts.mask = 0
     237  if lex.Percent:
    238238        knownParen = 0
    239239        CtCand = lex.Percent
     
    257257            knownNonCtReg |= pscanReg
    258258          newParen = knownNonCtReg & (escape_Callouts.LParen | escape_Callouts.RParen)
    259         out_Callouts.comment = CtCand &~ knownNonCtReg
    260         out_Callouts.mask = pablo.InclusiveSpan(out_Callouts.comment, pablo.ScanTo(out_Callouts.comment, lex.EOL))
     259        comment = CtCand &~ knownNonCtReg
     260        out_Callouts.mask = pablo.InclusiveSpan(comment, pablo.ScanTo(comment, lex.EOL))
    261261
    262262def Parse_String(escape_Callouts, marker, out_Callouts):
     
    267267        pscan = pablo.ScanTo(pablo.Advance(escape_Callouts.LParen), escape_Callouts.LParen | escape_Callouts.RParen)
    268268        qscan = pablo.ScanTo(pablo.Advance(escape_Callouts.RParen), escape_Callouts.LParen | escape_Callouts.RParen)
    269         marker.instring = pablo.ExclusiveSpan(escape_Callouts.LParen, pscan)
     269        instring = pablo.ExclusiveSpan(escape_Callouts.LParen, pscan)
    270270        closed = pscan & escape_Callouts.RParen
    271271        unclosed = pscan & escape_Callouts.LParen | qscan & escape_Callouts.RParen
     
    275275                pscan = pablo.ScanTo(pablo.Advance(unclosed & escape_Callouts.LParen), unclosed)
    276276                qscan = pablo.ScanTo(pablo.Advance(unclosed & escape_Callouts.RParen), unclosed)
    277                 marker.instring |= pablo.SpanUpTo(unclosed & escape_Callouts.LParen, pscan)
     277                instring |= pablo.SpanUpTo(unclosed & escape_Callouts.LParen, pscan)
    278278                closed = pscan & escape_Callouts.RParen
    279279                unclosed = pscan & escape_Callouts.LParen | qscan & escape_Callouts.RParen
     
    283283        # Any closing paren that was not actually used to close
    284284        # an opener is in error.
    285         out_Callouts.mask |= marker.instring
     285        out_Callouts.mask |= instring
     286        marker.starts = pablo.Advance(~instring)&instring
     287        marker.ends = pablo.ScanThru(marker.starts, instring)
    286288        marker.error |= escape_Callouts.RParen &~ all_closed       
    287289       
     
    293295        out_Callouts.hexString_mask = pablo.InclusiveSpan(hexString_starts,hexString_ends)
    294296        out_Callouts.mask |= out_Callouts.hexString_mask
    295         marker.hex_opener = hexString_starts
     297        out_Callouts.hex_opener = hexString_starts
     298        marker.starts |= hexString_starts
     299        marker.ends |= hexString_ends
    296300
    297301def Parse_Names(lex, marker, out_Callouts) :
     
    300304        out_Callouts.names_escapes = lex.Hash & pablo.Lookahead(lex.Hex) & pablo.Lookahead(lex.Hex,2)   
    301305        out_Callouts.mask |= pablo.InclusiveSpan(name_starts,names_follows)
    302         marker.name_starts = name_starts
     306        marker.starts |= name_starts
     307        marker.ends |= names_follows
    303308       
    304309def Parse_Numeric(lex, marker, out_Callouts) :
     
    306311        numeric_starts = (numeric_characters &~ pablo.Advance(lex.Regular)) &~ out_Callouts.mask
    307312        numeric_follows = pablo.ScanThru(numeric_starts, lex.Regular)
    308         marker.numeric_starts = numeric_starts 
     313        marker.starts |= numeric_starts
     314        marker.ends |= numeric_follows
    309315
    310316       
     
    341347       
    342348    if out_Callouts.hexString_mask:
    343         hexsting_partial_start = marker.hex_opener
     349        hexsting_partial_start = out_Callouts.hex_opener
    344350        hexsting_partial_odd_start = hexsting_partial_start & parity.odd
    345351        hexsting_partial_even_start = hexsting_partial_start & parity.even
  • proto/PDF/cb_template.cpp

    r2908 r2911  
    1919using namespace std;
    2020#include <iostream>
    21 
     21#include <vector>
    2222
    2323#ifdef BUFFER_PROFILING
     
    3939int pow_ten[8] = {1,10,100,1000,10000,100000,1000000,10000000};
    4040
    41 
    4241static inline int Digit_postprocessing(char * source, int pos);
    4342
     
    4847
    4948void do_process(FILE *infile, FILE *outfile, int filesize);
     49
     50enum PDFTokenType {
     51  str_token,
     52  hex_token,
     53  name_token,
     54  int_token,
     55  flt_token,
     56  kw_token
     57};
     58
     59union PDFToken { char * char_ptr; int idx;};
     60
     61typedef struct tokenStruct{
     62  union PDFToken token;
     63  int len;
     64  enum PDFTokenType type;
     65} TokenStruct;
     66
     67TokenStruct tokenArray[1000000];
     68int token_idx=0;
    5069
    5170#define MAX_NUM 1000000
     
    5372int dec_pl[MAX_NUM];
    5473int num_idx=0;
    55 
    5674
    5775static inline int Digit_postprocessing(char * source, int pos) {
     
    7795    if (negative) (num) = -(num);
    7896    numbers[num_idx] = num;
    79     if(start_pos!=-1)
     97    if(start_pos!=-1){
    8098      dec_pl[num_idx] = pow_ten[pos - start_pos];
    81     else
     99      tokenArray[token_idx].type = flt_token;
     100      tokenArray[token_idx].token.idx = num_idx;
     101    }
     102    else{
    82103      dec_pl[num_idx] = 1;
     104      tokenArray[token_idx].type = int_token;
     105      tokenArray[token_idx].token.idx = num_idx;
     106    }
    83107    num_idx++;
    84108}
     
    213237}
    214238
    215 static inline void Build_ContentBuffer(Out_Callouts out_Callouts, char * content_buf, char ** content_buf_ptr){
     239static inline void Build_ContentBuffer(Out_Callouts out_Callouts, Marker & marker, char * content_buf, char ** content_buf_ptr, int * del_sum, int blk){
    216240   
    217241    BytePack S[8];
    218    
     242
    219243    if (bitblock::any(out_Callouts.delmask)) {
    220244            BitBlock shift1, shift2, shift4, shift8;
     
    228252            do_right16_shifts(out_Callouts.bit_6, shift1, shift2, shift4, shift8);
    229253            do_right16_shifts(out_Callouts.bit_7, shift1, shift2, shift4, shift8);
     254            do_right16_shifts(marker.starts, shift1, shift2, shift4, shift8);
     255            do_right16_shifts(marker.ends, shift1, shift2, shift4, shift8);
    230256        }
    231257
     
    238264        for(int k=0; k<8; k++) units_per_reg.i8[k] = 0;
    239265        del_count(out_Callouts.delmask,units_per_reg.i128);
    240 //      for(int k=0; k<8; k++)
    241 //        printf("delcount=%i\n",units_per_reg.i8[k]);
     266        for(int k=0; k<8; k++) {
     267            del_sum[blk*8+k+1] = del_sum[blk*8+k] + (16-units_per_reg.i8[k]);
     268        }
    242269
    243270        for(int j=0; j<8; j++){
     
    247274}
    248275
    249 static inline void Postprocessing(char* src, Marker marker){   
    250  
    251     if(bitblock::any(marker.numeric_starts)){
     276static inline void Postprocessing(char* cb, int cb_blocks, Marker * marker, int * del_sum){   
     277     
     278  for(int i=0; i<cb_blocks; i++){
     279    if(bitblock::any(marker[i].starts)){
    252280      BitBlockForwardIterator iter;
    253       iter.init(&(marker.numeric_starts));
     281      iter.init(&(marker[i].starts));
    254282      BitBlockForwardIterator iter_end;
    255283      while(iter != iter_end) {
    256           Digit_postprocessing(src, *iter);
    257           iter++;
     284        int pos = i*BLOCK_SIZE + (*iter);
     285        pos = pos - del_sum[pos/16];
     286        if (cb[pos]== 0x2f){
     287          tokenArray[token_idx].type = name_token;
     288          tokenArray[token_idx].token.char_ptr = &cb[pos];
     289        }
     290        else if(pos>0 && cb[pos-1]== 0x28){
     291          tokenArray[token_idx].type = str_token;
     292          tokenArray[token_idx].token.char_ptr = &cb[pos];
     293        }
     294        else if (cb[pos]== 0x0c){
     295          tokenArray[token_idx].type = hex_token;
     296          tokenArray[token_idx].token.char_ptr = &cb[pos];
     297        }
     298        else if (cb[pos]== '-' || cb[pos]== '+' || (cb[pos]>='0'&&cb[pos]<='9')){
     299          Digit_postprocessing(cb, pos);
     300        }
     301        iter++;
     302        token_idx++;
    258303      }
    259     }
     304    }
     305  }
    260306}
    261307
     
    274320  struct Out_Callouts out_Callouts;
    275321
    276   struct Marker marker;
     322  struct Marker * marker = (struct Marker *)malloc(sizeof(struct Marker)*BLOCKS);
    277323
    278324  int block_base = 0;
     
    282328  char * content_buf = (char*)malloc(filesize);
    283329  char * content_buf_ptr =  content_buf;
     330  int del_sum[BLOCKS*8];
     331  del_sum[0] = 0;
    284332
    285333  parity.odd = simd<2>::constant<1>();
     
    302350    for (int i = 0; i < BLOCKS-1; i++){
    303351      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    304       parse_Comment.do_block(lex[i], escape_Callouts, marker, out_Callouts);
    305       parse_String.do_block(escape_Callouts, marker, out_Callouts);
    306       parse_HexStrings.do_block(lex[i], marker, out_Callouts);
    307       parse_Names.do_block(lex[i], marker, out_Callouts, lex[i+1]);
    308       parse_Numeric.do_block(lex[i], marker, out_Callouts);
    309       prepare_content_buffer.do_block(basis_bits[i], lex[i], marker, parity, escape_Callouts, out_Callouts, lex[i+1]);
    310       Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
    311       Postprocessing(&srcbuf[i*BLOCK_SIZE], marker);
     352      parse_Comment.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
     353      parse_String.do_block(escape_Callouts, marker[i], out_Callouts);
     354      parse_HexStrings.do_block(lex[i], marker[i], out_Callouts);
     355      parse_Names.do_block(lex[i], marker[i], out_Callouts, lex[i+1]);
     356      parse_Numeric.do_block(lex[i], marker[i], out_Callouts);
     357      prepare_content_buffer.do_block(basis_bits[i], lex[i], marker[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
     358      Build_ContentBuffer(out_Callouts, marker[i], content_buf, &content_buf_ptr, del_sum, i);
    312359    }
    313360   
     
    315362    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-chars_avail));
    316363    parse_Escaped.do_final_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    317     parse_Comment.do_final_block(lex[BLOCKS-1], escape_Callouts, marker, out_Callouts, EOF_mask);
    318     parse_String.do_final_block(escape_Callouts, marker, out_Callouts, EOF_mask);
    319     parse_HexStrings.do_final_block(lex[BLOCKS-1], marker, out_Callouts, EOF_mask);
    320     parse_Names.do_final_block(lex[BLOCKS-1], marker, out_Callouts, EOF_mask);
    321     parse_Numeric.do_block(lex[BLOCKS-1], marker, out_Callouts);
    322     prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker, parity, escape_Callouts, out_Callouts, EOF_mask);
     364    parse_Comment.do_final_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
     365    parse_String.do_final_block(escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
     366    parse_HexStrings.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     367    parse_Names.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     368    parse_Numeric.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     369    prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    323370    out_Callouts.delmask |= ~EOF_mask;         
    324     Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
    325     Postprocessing(&srcbuf[(BLOCKS-1)*BLOCK_SIZE], marker);
     371    Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], content_buf, &content_buf_ptr, del_sum, BLOCKS-1);
     372   
     373
     374    Postprocessing(content_buf, (content_buf_ptr-content_buf)/BLOCK_SIZE+1, marker, del_sum);
     375
    326376
    327377//      print_register("marker.error",marker.error);
    328    
     378//    
    329379//     for(int i=0;i<num_idx;i++)
    330380//       printf("%i,%i\n",numbers[i],dec_pl[i]);
     381   
     382//     for(int i=0; i<10;i++){
     383//       if(tokenArray[i].type==str_token)
     384//      printf("String: %s\n",tokenArray[i].token.char_ptr);
     385//   
     386//       if(tokenArray[i].type==hex_token)
     387//      printf("Hex: %s\n",tokenArray[i].token.char_ptr);
     388//       
     389//       if(tokenArray[i].type==name_token)
     390//      printf("Name: %s\n",tokenArray[i].token.char_ptr);
     391//       
     392//       if(tokenArray[i].type==int_token)
     393//      printf("Number: %i\n",numbers[tokenArray[i].token.idx]);
     394//       
     395//       if(tokenArray[i].type==flt_token)
     396//      printf("Number: %i\n",numbers[tokenArray[i].token.idx]);
     397//
     398//     }
     399     
    331400 
    332401    PERF_SEC_END(parser_timer, chars_read);
Note: See TracChangeset for help on using the changeset viewer.