Ignore:
Timestamp:
Feb 19, 2013, 2:23:46 PM (6 years ago)
Author:
lindanl
Message:

Add token generation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/cb_template.cpp

    r2908 r2911  
    1919using namespace std;
    2020#include <iostream>
    21 
     21#include <vector>
    2222
    2323#ifdef BUFFER_PROFILING
     
    3939int pow_ten[8] = {1,10,100,1000,10000,100000,1000000,10000000};
    4040
    41 
    4241static inline int Digit_postprocessing(char * source, int pos);
    4342
     
    4847
    4948void do_process(FILE *infile, FILE *outfile, int filesize);
     49
     50enum PDFTokenType {
     51  str_token,
     52  hex_token,
     53  name_token,
     54  int_token,
     55  flt_token,
     56  kw_token
     57};
     58
     59union PDFToken { char * char_ptr; int idx;};
     60
     61typedef struct tokenStruct{
     62  union PDFToken token;
     63  int len;
     64  enum PDFTokenType type;
     65} TokenStruct;
     66
     67TokenStruct tokenArray[1000000];
     68int token_idx=0;
    5069
    5170#define MAX_NUM 1000000
     
    5372int dec_pl[MAX_NUM];
    5473int num_idx=0;
    55 
    5674
    5775static inline int Digit_postprocessing(char * source, int pos) {
     
    7795    if (negative) (num) = -(num);
    7896    numbers[num_idx] = num;
    79     if(start_pos!=-1)
     97    if(start_pos!=-1){
    8098      dec_pl[num_idx] = pow_ten[pos - start_pos];
    81     else
     99      tokenArray[token_idx].type = flt_token;
     100      tokenArray[token_idx].token.idx = num_idx;
     101    }
     102    else{
    82103      dec_pl[num_idx] = 1;
     104      tokenArray[token_idx].type = int_token;
     105      tokenArray[token_idx].token.idx = num_idx;
     106    }
    83107    num_idx++;
    84108}
     
    213237}
    214238
    215 static inline void Build_ContentBuffer(Out_Callouts out_Callouts, char * content_buf, char ** content_buf_ptr){
     239static inline void Build_ContentBuffer(Out_Callouts out_Callouts, Marker & marker, char * content_buf, char ** content_buf_ptr, int * del_sum, int blk){
    216240   
    217241    BytePack S[8];
    218    
     242
    219243    if (bitblock::any(out_Callouts.delmask)) {
    220244            BitBlock shift1, shift2, shift4, shift8;
     
    228252            do_right16_shifts(out_Callouts.bit_6, shift1, shift2, shift4, shift8);
    229253            do_right16_shifts(out_Callouts.bit_7, shift1, shift2, shift4, shift8);
     254            do_right16_shifts(marker.starts, shift1, shift2, shift4, shift8);
     255            do_right16_shifts(marker.ends, shift1, shift2, shift4, shift8);
    230256        }
    231257
     
    238264        for(int k=0; k<8; k++) units_per_reg.i8[k] = 0;
    239265        del_count(out_Callouts.delmask,units_per_reg.i128);
    240 //      for(int k=0; k<8; k++)
    241 //        printf("delcount=%i\n",units_per_reg.i8[k]);
     266        for(int k=0; k<8; k++) {
     267            del_sum[blk*8+k+1] = del_sum[blk*8+k] + (16-units_per_reg.i8[k]);
     268        }
    242269
    243270        for(int j=0; j<8; j++){
     
    247274}
    248275
    249 static inline void Postprocessing(char* src, Marker marker){   
    250  
    251     if(bitblock::any(marker.numeric_starts)){
     276static inline void Postprocessing(char* cb, int cb_blocks, Marker * marker, int * del_sum){   
     277     
     278  for(int i=0; i<cb_blocks; i++){
     279    if(bitblock::any(marker[i].starts)){
    252280      BitBlockForwardIterator iter;
    253       iter.init(&(marker.numeric_starts));
     281      iter.init(&(marker[i].starts));
    254282      BitBlockForwardIterator iter_end;
    255283      while(iter != iter_end) {
    256           Digit_postprocessing(src, *iter);
    257           iter++;
     284        int pos = i*BLOCK_SIZE + (*iter);
     285        pos = pos - del_sum[pos/16];
     286        if (cb[pos]== 0x2f){
     287          tokenArray[token_idx].type = name_token;
     288          tokenArray[token_idx].token.char_ptr = &cb[pos];
     289        }
     290        else if(pos>0 && cb[pos-1]== 0x28){
     291          tokenArray[token_idx].type = str_token;
     292          tokenArray[token_idx].token.char_ptr = &cb[pos];
     293        }
     294        else if (cb[pos]== 0x0c){
     295          tokenArray[token_idx].type = hex_token;
     296          tokenArray[token_idx].token.char_ptr = &cb[pos];
     297        }
     298        else if (cb[pos]== '-' || cb[pos]== '+' || (cb[pos]>='0'&&cb[pos]<='9')){
     299          Digit_postprocessing(cb, pos);
     300        }
     301        iter++;
     302        token_idx++;
    258303      }
    259     }
     304    }
     305  }
    260306}
    261307
     
    274320  struct Out_Callouts out_Callouts;
    275321
    276   struct Marker marker;
     322  struct Marker * marker = (struct Marker *)malloc(sizeof(struct Marker)*BLOCKS);
    277323
    278324  int block_base = 0;
     
    282328  char * content_buf = (char*)malloc(filesize);
    283329  char * content_buf_ptr =  content_buf;
     330  int del_sum[BLOCKS*8];
     331  del_sum[0] = 0;
    284332
    285333  parity.odd = simd<2>::constant<1>();
     
    302350    for (int i = 0; i < BLOCKS-1; i++){
    303351      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    304       parse_Comment.do_block(lex[i], escape_Callouts, marker, out_Callouts);
    305       parse_String.do_block(escape_Callouts, marker, out_Callouts);
    306       parse_HexStrings.do_block(lex[i], marker, out_Callouts);
    307       parse_Names.do_block(lex[i], marker, out_Callouts, lex[i+1]);
    308       parse_Numeric.do_block(lex[i], marker, out_Callouts);
    309       prepare_content_buffer.do_block(basis_bits[i], lex[i], marker, parity, escape_Callouts, out_Callouts, lex[i+1]);
    310       Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
    311       Postprocessing(&srcbuf[i*BLOCK_SIZE], marker);
     352      parse_Comment.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
     353      parse_String.do_block(escape_Callouts, marker[i], out_Callouts);
     354      parse_HexStrings.do_block(lex[i], marker[i], out_Callouts);
     355      parse_Names.do_block(lex[i], marker[i], out_Callouts, lex[i+1]);
     356      parse_Numeric.do_block(lex[i], marker[i], out_Callouts);
     357      prepare_content_buffer.do_block(basis_bits[i], lex[i], marker[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
     358      Build_ContentBuffer(out_Callouts, marker[i], content_buf, &content_buf_ptr, del_sum, i);
    312359    }
    313360   
     
    315362    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-chars_avail));
    316363    parse_Escaped.do_final_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    317     parse_Comment.do_final_block(lex[BLOCKS-1], escape_Callouts, marker, out_Callouts, EOF_mask);
    318     parse_String.do_final_block(escape_Callouts, marker, out_Callouts, EOF_mask);
    319     parse_HexStrings.do_final_block(lex[BLOCKS-1], marker, out_Callouts, EOF_mask);
    320     parse_Names.do_final_block(lex[BLOCKS-1], marker, out_Callouts, EOF_mask);
    321     parse_Numeric.do_block(lex[BLOCKS-1], marker, out_Callouts);
    322     prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker, parity, escape_Callouts, out_Callouts, EOF_mask);
     364    parse_Comment.do_final_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
     365    parse_String.do_final_block(escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
     366    parse_HexStrings.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     367    parse_Names.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     368    parse_Numeric.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     369    prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    323370    out_Callouts.delmask |= ~EOF_mask;         
    324     Build_ContentBuffer(out_Callouts, content_buf, &content_buf_ptr);
    325     Postprocessing(&srcbuf[(BLOCKS-1)*BLOCK_SIZE], marker);
     371    Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], content_buf, &content_buf_ptr, del_sum, BLOCKS-1);
     372   
     373
     374    Postprocessing(content_buf, (content_buf_ptr-content_buf)/BLOCK_SIZE+1, marker, del_sum);
     375
    326376
    327377//      print_register("marker.error",marker.error);
    328    
     378//    
    329379//     for(int i=0;i<num_idx;i++)
    330380//       printf("%i,%i\n",numbers[i],dec_pl[i]);
     381   
     382//     for(int i=0; i<10;i++){
     383//       if(tokenArray[i].type==str_token)
     384//      printf("String: %s\n",tokenArray[i].token.char_ptr);
     385//   
     386//       if(tokenArray[i].type==hex_token)
     387//      printf("Hex: %s\n",tokenArray[i].token.char_ptr);
     388//       
     389//       if(tokenArray[i].type==name_token)
     390//      printf("Name: %s\n",tokenArray[i].token.char_ptr);
     391//       
     392//       if(tokenArray[i].type==int_token)
     393//      printf("Number: %i\n",numbers[tokenArray[i].token.idx]);
     394//       
     395//       if(tokenArray[i].type==flt_token)
     396//      printf("Number: %i\n",numbers[tokenArray[i].token.idx]);
     397//
     398//     }
     399     
    331400 
    332401    PERF_SEC_END(parser_timer, chars_read);
Note: See TracChangeset for help on using the changeset viewer.