Changeset 3129 for proto


Ignore:
Timestamp:
May 11, 2013, 4:20:58 PM (6 years ago)
Author:
lindanl
Message:

Bug fixes.

Location:
proto/PDF
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/cb_pablo.py

    r3066 r3129  
    239239          escape_Callouts.Nonoct = lex.Nonoct & escaped
    240240          escape_Callouts.Oct1 = lex.Oct & escaped
    241           if escape_Callouts.Oct1:
    242                   escape_Callouts.Oct2 = escape_Callouts.Oct1 & pablo.Lookahead(lex.Oct)
    243                   escape_Callouts.Oct1 &= ~escape_Callouts.Oct2
    244                   escape_Callouts.Oct3 = escape_Callouts.Oct2 & pablo.Lookahead(lex.Oct, 2)
    245                   escape_Callouts.Oct2 &= ~escape_Callouts.Oct3
     241          escape_Callouts.Oct2 = escape_Callouts.Oct1 & pablo.Lookahead(lex.Oct)
     242          escape_Callouts.Oct1 &= ~escape_Callouts.Oct2
     243          escape_Callouts.Oct3 = escape_Callouts.Oct2 & pablo.Lookahead(lex.Oct, 2)
     244          escape_Callouts.Oct2 &= ~escape_Callouts.Oct3
    246245          out_Callouts.delmask = escape_mark
    247246       
     
    366365        # Any closing paren that was not actually used to close
    367366        # an opener is in error.
    368         marker.starts = pablo.Advance(~instring) & instring
    369         marker.ends = pablo.ScanThru(marker.starts, instring)
     367        marker.starts = escape_Callouts.LParen &~ out_Callouts.mask
     368        marker.ends = pablo.AdvanceThenScanThru(marker.starts, instring)
    370369        marker.error |= escape_Callouts.RParen &~ closed_RParen &~ pablo.SpanUpTo(comment_start, comment_end)
    371370
     
    388387        name_starts =lex.Slash &~ out_Callouts.mask
    389388        names_follows = pablo.ScanThru(pablo.Advance(name_starts), lex.Regular)
    390         out_Callouts.names_escapes = lex.Hash & pablo.Lookahead(lex.Hex) & pablo.Lookahead(lex.Hex,2)   
     389        out_Callouts.names_escapes = lex.Hash & pablo.Lookahead(lex.Hex) & pablo.Lookahead(lex.Hex,2) &~ out_Callouts.mask
    391390        out_Callouts.mask |= pablo.InclusiveSpan(name_starts,names_follows)
    392391        marker.starts |= name_starts
     
    555554    out_Callouts.bit_6 = out_Callouts.bit_6 &~ out_Callouts.zeromask
    556555    out_Callouts.bit_7 = out_Callouts.bit_7 &~ out_Callouts.zeromask
     556    marker.starts =  marker.starts &~ out_Callouts.zeromask
     557    marker.ends =  marker.ends &~ out_Callouts.zeromask
    557558   
    558559#
  • proto/PDF/cb_template.cpp

    r3066 r3129  
    7373} TokenStruct;
    7474
     75struct pdfOptr{
     76  char * char_ptr;
     77  int len;
     78  int token_pos;
     79};
     80
    7581TokenStruct tokenArray[1000000];
    7682int token_idx=0;
     83
     84struct pdfOptr optrArray[1000000];
     85int optr_idx=0;
    7786
    7887#define MAX_NUM 1000000
     
    200209}
    201210
    202 static inline void Build_ContentBuffer(Out_Callouts out_Callouts, Marker & marker, char ** content_buf_ptr, int * del_sum, int blk){
     211static inline void Build_ContentBuffer(Out_Callouts out_Callouts, Marker & marker, char ** content_buf_ptr, ubitblock * unitsums_per_reg, int blk){
    203212   
    204213    BytePack S[8];
     
    219228        }
    220229
    221         union {BitBlock i128; uint16_t i8[8];} units_per_reg;
     230        ubitblock units_per_reg;
    222231       
    223232        p2s(out_Callouts.bit_0,out_Callouts.bit_1,out_Callouts.bit_2,out_Callouts.bit_3,
     
    225234                          S[0], S[1],S[2] ,S[3] ,S[4] ,S[5] ,S[6] ,S[7]);
    226235         
    227         for(int k=0; k<8; k++) units_per_reg.i8[k] = 0;
    228         del_count(out_Callouts.delmask,units_per_reg.i128);
    229         for(int k=0; k<8; k++) {
    230             del_sum[blk*8+k+1] = del_sum[blk*8+k] + (16-units_per_reg.i8[k]);
    231         }
    232 
     236        for(int k=0; k<8; k++) units_per_reg._16[k] = 0;
     237        del_count(out_Callouts.delmask,units_per_reg._128);
     238       
     239        unitsums_per_reg[blk]._128 = PartialSum16(units_per_reg._128);
     240       
    233241        for(int j=0; j<8; j++){
    234242          bitblock::store_unaligned(S[j], (BytePack *) *content_buf_ptr);
    235           *content_buf_ptr += units_per_reg.i8[j];
    236         }
    237 }
    238 
    239 static inline void Postprocessing(char* cb, int cb_size, char* cb_new, Marker * marker, int * del_sum, int & at_start, int & base_pos){   
     243          *content_buf_ptr += units_per_reg._16[j];
     244        }
     245}
     246
     247static inline void Postprocessing(char* cb, int cb_size, char* cb_new, Marker * marker, ubitblock * unitsums_per_reg, int & at_start, int & at_optr, int & base_pos){   
    240248     
    241249  int pos = 0;
    242250 
    243   if(!at_start){
    244     if (tokenArray[token_idx].type == name_token ||
    245         tokenArray[token_idx].type == str_token ||
    246         tokenArray[token_idx].type == hex_token ||
    247         tokenArray[token_idx].type == kw_token){
    248       tokenArray[token_idx].token.char_ptr = cb;
     251  if(!at_start){
     252    if(at_optr){
     253      optrArray[optr_idx].char_ptr = cb;
    249254    }
    250255    else{
    251       num_idx--;
    252       Digit_postprocessing(cb, 0);
     256      if (tokenArray[token_idx].type == flt_token || tokenArray[token_idx].type == int_token){
     257        num_idx--;
     258        Digit_postprocessing(cb, 0);
     259      }
     260      else{
     261        tokenArray[token_idx].token.char_ptr = cb;
     262      }
    253263    }
    254264  }
    255 
    256265   
    257266  int block_pos = base_pos;
    258267  for(int i=0; i<BLOCKS; i++){
    259268    if(bitblock::any(simd_or(marker[i].starts,marker[i].ends))){
    260       BitBlockForwardIterator iter_start;
    261       BitBlockForwardIterator iter_end;
    262       iter_start.init(&(marker[i].starts));
    263       iter_end.init(&(marker[i].ends));
    264       BitBlockForwardIterator end;
    265       while(iter_start != end || iter_end != end) {
     269      BitBlockForwardIteratorWithBaseCounts16 iter_start;
     270      BitBlockForwardIteratorWithBaseCounts16 iter_end;
     271      iter_start.init(&(marker[i].starts), unitsums_per_reg[i]._128);
     272      iter_end.init(&(marker[i].ends), unitsums_per_reg[i]._128);
     273      BitBlockForwardIteratorWithBaseCounts16 end;
     274      while(*iter_start != -1 || *iter_end != -1) {
    266275        if(at_start){
    267276          pos = block_pos + (*iter_start);
    268           pos = pos - del_sum[i*8 + (*iter_start)/16];
    269277          if (cb[pos]== 0x2f){ 
    270278            tokenArray[token_idx].type = name_token;
    271279            tokenArray[token_idx].token.char_ptr = &cb[pos];
    272280          }
    273           else if(pos>0 && cb[pos-1]== 0x28){   
     281          else if(cb[pos]== 0x28){     
    274282            tokenArray[token_idx].type = str_token;
    275283            tokenArray[token_idx].token.char_ptr = &cb[pos];
     
    282290            Digit_postprocessing(cb, pos);
    283291          }
    284           else {
     292          else if ( (cb[pos]== 'n' && cb[pos+1]== 'u' && cb[pos+2]== 'l'  && cb[pos+3]== 'l')
     293            || (cb[pos]== 't' && cb[pos+1]== 'r' && cb[pos+2]== 'u'  && cb[pos+3]== 'e')
     294            || (cb[pos]== 'f' && cb[pos+1]== 'a' && cb[pos+2]== 'l'  && cb[pos+3]== 's' && cb[pos+3]== 'e')){
     295           
    285296            tokenArray[token_idx].type = kw_token;
    286297            tokenArray[token_idx].token.char_ptr = &cb[pos];   
    287298          }
     299          else{
     300            //operator
     301            optrArray[optr_idx].char_ptr = &cb[pos];
     302            optrArray[optr_idx].token_pos = token_idx-1;
     303            at_optr = 1;
     304          }
    288305          iter_start++;
    289306          at_start = 0;
    290307        }
    291         else{   
    292           tokenArray[token_idx].len = base_pos + i*BLOCK_SIZE + (*iter_end) - pos;
    293           iter_end++;
    294           token_idx++;   
     308        else{
     309          int len = block_pos + (*iter_end) - pos;     
     310          if(at_optr){
     311            optrArray[optr_idx].len = len;
     312            optr_idx++;
     313            at_optr = 0;
     314          }
     315          else{
     316            tokenArray[token_idx].len = len;
     317            token_idx++;
     318          }
     319          iter_end++;
    295320          at_start = 1;
    296321        }
    297322      }
    298     }
    299     block_pos += BLOCK_SIZE - del_sum[i*8+7];
     323    }   
     324
     325    block_pos += unitsums_per_reg[i]._16[7];
    300326  }
    301327  if(!at_start){
    302328    base_pos = cb_size - pos;
    303     if(base_pos<0){
    304       cout << "in markup " << cb_size << "," << pos << "," << base_pos << endl;
    305     }
    306     memcpy(cb_new, cb, base_pos);
     329//     if(base_pos<0){
     330//       cout << "in markup " << cb_size << "," << pos << "," << base_pos << endl;
     331//     }
     332    memcpy(cb_new, &cb[pos], base_pos);
    307333  }
    308334  else
    309335    base_pos = 0;
    310 //   cout << cb_size << "," << pos << "," << base_pos << endl; 
    311336}
    312337
     
    335360  char * content_buf_ptr =  content_buf[0];
    336361  int cb_idx = 0; 
    337   int del_sum[BLOCKS*8];
    338   del_sum[0] = 0;
     362  ubitblock unitsums_per_reg[BLOCKS];
    339363  int at_start = 1;
     364  int at_optr = 0;
    340365  int base_pos = 0;
    341366
     
    365390      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    366391      match_Parens_With_Comments.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
    367 //       parse_Comment.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
    368 //       parse_String.do_block(escape_Callouts, marker[i], out_Callouts);
    369392      parse_HexStrings.do_block(lex[i], marker[i], out_Callouts);
    370393      parse_Names.do_block(lex[i], marker[i], out_Callouts, lex[i+1]);
     
    372395      parse_Keywords.do_block(lex[i], marker[i], out_Callouts);
    373396      prepare_content_buffer.do_block(basis_bits[i], lex[i], marker[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    374       Build_ContentBuffer(out_Callouts, marker[i], &content_buf_ptr, del_sum, i);
     397      Build_ContentBuffer(out_Callouts, marker[i], &content_buf_ptr, unitsums_per_reg, i);
    375398    }
    376399       
     
    380403    parse_Escaped.do_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, lex[0]);
    381404    match_Parens_With_Comments.do_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts);
    382 //     parse_Comment.do_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts);
    383 //     parse_String.do_block(escape_Callouts, marker[BLOCKS-1], out_Callouts);
    384405    parse_HexStrings.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts);
    385406    parse_Names.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, lex[0]);
     
    387408    parse_Keywords.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts);
    388409    prepare_content_buffer.do_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, lex[0]);
    389     Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], &content_buf_ptr, del_sum, BLOCKS-1);
    390    
     410    Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], &content_buf_ptr, unitsums_per_reg, BLOCKS-1);
     411
    391412    content_buf[cb_idx+1] = (char*)malloc(BUF_SIZE*2);
    392     Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker, del_sum, at_start, base_pos);
    393      fwrite(content_buf[cb_idx], 1, content_buf_ptr-content_buf[cb_idx], outfile);
     413    Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker, unitsums_per_reg, at_start, at_optr, base_pos);
    394414    content_buf_ptr =  content_buf[cb_idx+1]+base_pos;
    395     cb_idx++;     
    396    
     415    cb_idx++; 
    397416   
    398417  }
     
    400419  //partial buffer
    401420  else{
    402 //     cout << "final buffer" << endl;
     421//      cout << "final buffer" << endl;
    403422    BLOCKS = chars_read/BLOCK_SIZE + 1;   
    404423 
     
    411430      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    412431      match_Parens_With_Comments.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
    413 //       parse_Comment.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
    414 //       parse_String.do_block(escape_Callouts, marker[i], out_Callouts);
    415432      parse_HexStrings.do_block(lex[i], marker[i], out_Callouts);
    416433      parse_Names.do_block(lex[i], marker[i], out_Callouts, lex[i+1]);
     
    418435      parse_Keywords.do_block(lex[i], marker[i], out_Callouts);
    419436      prepare_content_buffer.do_block(basis_bits[i], lex[i], marker[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    420       Build_ContentBuffer(out_Callouts, marker[i], &content_buf_ptr, del_sum, i);
     437      Build_ContentBuffer(out_Callouts, marker[i], &content_buf_ptr, unitsums_per_reg, i);
    421438      chars_avail -= BLOCK_SIZE;
    422439    }
     
    427444    parse_Escaped.do_final_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    428445    match_Parens_With_Comments.do_final_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
    429 //     parse_Comment.do_final_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
    430 //     parse_String.do_final_block(escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
    431446    parse_HexStrings.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
    432447    parse_Names.do_final_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, EOF_mask);
     
    435450    prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    436451    out_Callouts.delmask |= ~EOF_mask;         
    437     Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], &content_buf_ptr, del_sum, BLOCKS-1);
     452    Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], &content_buf_ptr, unitsums_per_reg, BLOCKS-1);
    438453    content_buf[cb_idx+1] = (char*)malloc(BUF_SIZE*2);
    439     Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker, del_sum, at_start, base_pos); 
     454    Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker, unitsums_per_reg, at_start, at_optr, base_pos); 
    440455    break;
    441456  }
    442457}
    443 
    444 //      print_register("marker.error",marker.error);
    445 //    
    446 //     for(int i=0;i<num_idx;i++)
    447 //       printf("%i,%i\n",numbers[i],dec_pl[i]);
     458/*
     459    print_register("marker.error",marker.error);
     460   
     461    for(int i=0;i<num_idx;i++)
     462      printf("%i,%i\n",numbers[i],dec_pl[i]);*/
    448463/*   
    449464    for(int i=0; i<100;i++){
     
    476491          printf("%c",tokenArray[i].token.char_ptr[j]);
    477492        printf("\n");
    478       }*/
    479 
    480     }
    481      
     493      }
     494    }
     495   
     496
     497    for(int i=0;i<100;i++)  {
     498        printf("len at %i is %i\n",i, optrArray[i].len);
     499        for(int j=0; j<optrArray[i].len; j++)
     500          printf("%c",optrArray[i].char_ptr[j]);
     501        printf("\n");
     502    }*/
    482503
    483504}
Note: See TracChangeset for help on using the changeset viewer.