Changeset 2957


Ignore:
Timestamp:
Mar 20, 2013, 5:54:01 PM (5 years ago)
Author:
lindanl
Message:

Read one segment at a time

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/cb_template.cpp

    r2934 r2957  
    2020#include <iostream>
    2121#include <vector>
     22
     23int BLOCKS = 12;
     24#define BUF_SIZE BLOCK_SIZE * BLOCKS
     25
    2226
    2327#ifdef BUFFER_PROFILING
     
    112116}
    113117
    114 int main(int argc, char * argv[]) {
    115 
    116         char * infilename, * outfilename;
    117         FILE *infile, *outfile;
    118         struct stat fileinfo;
    119 
    120         if (argc < 2) {
    121                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    122                 exit(-1);
    123         }
    124 
    125         infilename = argv[1];
    126         stat(infilename, &fileinfo);
    127         infile = fopen(infilename, "rb");
    128         if (!infile) {
    129                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    130                 exit(-1);
    131         }
    132 
    133         if (argc < 3) outfile = stdout;
    134         else {
    135                 outfilename = argv[2];
    136                 outfile = fopen(outfilename, "wb");
    137                 if (!outfile) {
    138                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    139                         exit(-1);
    140                 }
    141         }
    142 
    143 //      PERF_SEC_BIND(1);
    144 
    145         PERF_SEC_INIT(s2p_lex_timer);
    146         PERF_SEC_INIT(parser_timer);
    147         PERF_SEC_INIT(postprocess_timer);
    148 
    149         do_process(infile, outfile, fileinfo.st_size+1);
    150 
    151         PERF_SEC_DUMP(s2p_lex_timer);
    152         PERF_SEC_DUMP(parser_timer);
    153         PERF_SEC_DUMP(postprocess_timer);
    154 
    155         PERF_SEC_DESTROY(s2p_lex_timer);
    156         PERF_SEC_DESTROY(parser_timer);
    157         PERF_SEC_DESTROY(postprocess_timer);
    158 
    159         fclose(infile);
    160         fclose(outfile);
    161 
    162         return(0);
    163 }
    164 
    165118/* s2p Definitions */
    166119static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     
    247200}
    248201
    249 static inline void Build_ContentBuffer(Out_Callouts out_Callouts, Marker & marker, char * content_buf, char ** content_buf_ptr, int * del_sum, int blk){
     202static inline void Build_ContentBuffer(Out_Callouts out_Callouts, Marker & marker, char ** content_buf_ptr, int * del_sum, int blk){
    250203   
    251204    BytePack S[8];
     
    284237}
    285238
    286 static inline void Postprocessing(char* cb, int cb_blocks, Marker * marker, int * del_sum){   
     239static inline void Postprocessing(char* cb, int cb_size, char* cb_new, Marker * marker, int * del_sum, int & at_start, int & base_pos){   
    287240     
    288241  int pos = 0;
    289   int at_start = 1;
    290   for(int i=0; i<cb_blocks; i++){
    291     if(bitblock::any(marker[i].starts)){
     242 
     243  if(!at_start){
     244    if (tokenArray[token_idx].type == name_token ||
     245        tokenArray[token_idx].type == str_token ||
     246        tokenArray[token_idx].type == hex_token ||
     247        tokenArray[token_idx].type == kw_token){
     248      tokenArray[token_idx].token.char_ptr = cb;
     249    }
     250    else{
     251      num_idx--;
     252      Digit_postprocessing(cb, 0);
     253    }
     254  }
     255
     256   
     257  int block_pos = base_pos;
     258  for(int i=0; i<BLOCKS; i++){
     259    if(bitblock::any(simd_or(marker[i].starts,marker[i].ends))){
    292260      BitBlockForwardIterator iter_start;
    293261      BitBlockForwardIterator iter_end;
     
    297265      while(iter_start != end || iter_end != end) {
    298266        if(at_start){
    299           pos = i*BLOCK_SIZE + (*iter_start);
    300           pos = pos - del_sum[pos/16];
    301           if (cb[pos]== 0x2f){
     267          pos = block_pos + (*iter_start);
     268          pos = pos - del_sum[i*8 + (*iter_start)/16];
     269          if (cb[pos]== 0x2f){ 
    302270            tokenArray[token_idx].type = name_token;
    303271            tokenArray[token_idx].token.char_ptr = &cb[pos];
    304272          }
    305           else if(pos>0 && cb[pos-1]== 0x28){
     273          else if(pos>0 && cb[pos-1]== 0x28){   
    306274            tokenArray[token_idx].type = str_token;
    307275            tokenArray[token_idx].token.char_ptr = &cb[pos];
    308276          }
    309           else if (cb[pos]== 0x0c){
     277          else if (cb[pos]== 0x0c){     
    310278            tokenArray[token_idx].type = hex_token;
    311279            tokenArray[token_idx].token.char_ptr = &cb[pos];
     
    316284          else {
    317285            tokenArray[token_idx].type = kw_token;
    318             tokenArray[token_idx].token.char_ptr = &cb[pos];     
     286            tokenArray[token_idx].token.char_ptr = &cb[pos];   
    319287          }
    320288          iter_start++;
    321289          at_start = 0;
    322290        }
    323         else{    
    324           tokenArray[token_idx].len = i*BLOCK_SIZE + (*iter_end) - pos;
     291        else{   
     292          tokenArray[token_idx].len = base_pos + i*BLOCK_SIZE + (*iter_end) - pos;
    325293          iter_end++;
    326294          token_idx++;   
     
    329297      }
    330298    }
     299    block_pos += BLOCK_SIZE - del_sum[i*8+7];
    331300  }
    332 }
     301  if(!at_start){
     302    base_pos = cb_size - pos;
     303    if(base_pos<0){
     304      cout << "in markup " << cb_size << "," << pos << "," << base_pos << endl;
     305    }
     306    memcpy(cb_new, cb, base_pos);
     307  }
     308  else
     309    base_pos = 0;
     310//   cout << cb_size << "," << pos << "," << base_pos << endl; 
     311}
     312
     313#define MAX_CB  10000
    333314
    334315void do_process(FILE *infile, FILE *outfile, int filesize) {
    335316 
    336   int BLOCKS = filesize/BLOCK_SIZE+1;
    337 
    338317  struct Basis_bits * basis_bits = (struct Basis_bits *)malloc(sizeof(struct Basis_bits)*BLOCKS);
    339318
    340 #ifndef TWOBLOCK_LEX
    341319  struct Lex * lex = (struct Lex *)malloc(sizeof(struct Lex)*BLOCKS);
    342 #endif
    343 #ifdef TWOBLOCK_LEX 
    344   struct Lex lex0;
    345   struct Lex lex1;
    346 #endif
    347320
    348321  struct Parity parity;
     
    357330  int chars_read = 0;
    358331  int chars_avail = 0;
    359   char * srcbuf = (char*)malloc(filesize);
    360   char * content_buf = (char*)malloc(filesize);
    361   char * content_buf_ptr =  content_buf;
     332  char * srcbuf = (char*)malloc(BUF_SIZE);
     333  char * content_buf[MAX_CB];
     334  content_buf[0] = (char*)malloc(BUF_SIZE*2);
     335  char * content_buf_ptr =  content_buf[0];
     336  int cb_idx = 0; 
    362337  int del_sum[BLOCKS*8];
    363338  del_sum[0] = 0;
     339  int at_start = 1;
     340  int base_pos = 0;
    364341
    365342  parity.odd = simd<2>::constant<1>();
    366343  parity.even = simd<2>::constant<2>();
    367  
    368 
    369   chars_read = fread((void *)srcbuf, 1, filesize, infile);
    370   chars_avail = chars_read;
    371344
    372345@stream_stmts
    373346
    374 
    375     PERF_SEC_START(s2p_lex_timer);
     347  chars_read = fread((void *)srcbuf, 1, BUF_SIZE, infile);
     348  chars_avail = chars_read;
     349 
     350 
     351 
     352  s2p_do_block((BytePack *) &srcbuf[0], basis_bits[0]);
     353  classify_bytes.do_block(basis_bits[0], lex[0]);
     354
     355while(chars_read){
     356  // full buffer
     357  if (chars_read == BUF_SIZE){
    376358   
    377     for (int i = 0; i < BLOCKS; i++){
     359    for (int i = 1; i < BLOCKS; i++){
    378360      s2p_do_block((BytePack *) &srcbuf[i*BLOCK_SIZE], basis_bits[i]);
    379     }
    380 #ifndef TWOBLOCK_LEX
    381     for (int i = 0; i < BLOCKS; i++){
    382361      classify_bytes.do_block(basis_bits[i], lex[i]);
    383362    }
    384 #endif
    385 #ifdef TWOBLOCK_LEX 
    386     classify_bytes.do_block(basis_bits[0], lex0);
    387 #endif
    388    
    389     PERF_SEC_END(s2p_lex_timer, chars_read);
    390     PERF_SEC_START(parser_timer);
    391 
    392363
    393364    for (int i = 0; i < BLOCKS-1; i++){
    394 #ifdef TWOBLOCK_LEX 
    395       classify_bytes.do_block(basis_bits[i+1], lex1);
    396       parse_Escaped.do_block(lex0, parity, escape_Callouts, out_Callouts, lex1);
    397       parse_Comment.do_block(lex0, escape_Callouts, marker[i], out_Callouts);
    398       parse_String.do_block(escape_Callouts, marker[i], out_Callouts);
    399       parse_HexStrings.do_block(lex0, marker[i], out_Callouts);
    400       parse_Names.do_block(lex0, marker[i], out_Callouts, lex1);
    401       parse_Numeric.do_block(lex0, marker[i], out_Callouts);
    402       parse_Keywords.do_block(lex0, marker[i], out_Callouts);
    403       prepare_content_buffer.do_block(basis_bits[i], lex0, marker[i], parity, escape_Callouts, out_Callouts, lex1);
    404       Build_ContentBuffer(out_Callouts, marker[i], content_buf, &content_buf_ptr, del_sum, i);
    405       lex0 = lex1;
    406 #endif
    407 #ifndef TWOBLOCK_LEX
    408365      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    409366      parse_Comment.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
     
    414371      parse_Keywords.do_block(lex[i], marker[i], out_Callouts);
    415372      prepare_content_buffer.do_block(basis_bits[i], lex[i], marker[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
    416       Build_ContentBuffer(out_Callouts, marker[i], content_buf, &content_buf_ptr, del_sum, i);
    417 #endif
    418     }
     373      Build_ContentBuffer(out_Callouts, marker[i], &content_buf_ptr, del_sum, i);
     374    }
     375       
     376    chars_read = fread((void *)srcbuf, 1, BUF_SIZE, infile);
     377    s2p_do_block((BytePack *) &srcbuf[0], basis_bits[0]);
     378    classify_bytes.do_block(basis_bits[0], lex[0]);   
     379    parse_Escaped.do_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, lex[0]);
     380    parse_Comment.do_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts);
     381    parse_String.do_block(escape_Callouts, marker[BLOCKS-1], out_Callouts);
     382    parse_HexStrings.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts);
     383    parse_Names.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts, lex[0]);
     384    parse_Numeric.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts);
     385    parse_Keywords.do_block(lex[BLOCKS-1], marker[BLOCKS-1], out_Callouts);
     386    prepare_content_buffer.do_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, lex[0]);
     387    Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], &content_buf_ptr, del_sum, BLOCKS-1);
    419388   
     389    content_buf[cb_idx+1] = (char*)malloc(BUF_SIZE*2);
     390    Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker, del_sum, at_start, base_pos);
     391     fwrite(content_buf[cb_idx], 1, content_buf_ptr-content_buf[cb_idx], outfile);
     392    content_buf_ptr =  content_buf[cb_idx+1]+base_pos;
     393    cb_idx++;     
     394   
     395   
     396  }
     397 
     398  //partial buffer
     399  else{
     400//     cout << "final buffer" << endl;
     401    BLOCKS = chars_read/BLOCK_SIZE + 1;   
     402 
     403    for (int i = 1; i < BLOCKS; i++){
     404      s2p_do_block((BytePack *) &srcbuf[i*BLOCK_SIZE], basis_bits[i]);
     405      classify_bytes.do_block(basis_bits[i], lex[i]);
     406    }
     407   
     408    for (int i = 0; i < BLOCKS-1; i++){
     409      parse_Escaped.do_block(lex[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
     410      parse_Comment.do_block(lex[i], escape_Callouts, marker[i], out_Callouts);
     411      parse_String.do_block(escape_Callouts, marker[i], out_Callouts);
     412      parse_HexStrings.do_block(lex[i], marker[i], out_Callouts);
     413      parse_Names.do_block(lex[i], marker[i], out_Callouts, lex[i+1]);
     414      parse_Numeric.do_block(lex[i], marker[i], out_Callouts);
     415      parse_Keywords.do_block(lex[i], marker[i], out_Callouts);
     416      prepare_content_buffer.do_block(basis_bits[i], lex[i], marker[i], parity, escape_Callouts, out_Callouts, lex[i+1]);
     417      Build_ContentBuffer(out_Callouts, marker[i], &content_buf_ptr, del_sum, i);
     418      chars_avail -= BLOCK_SIZE;
     419    }
     420     
    420421    /*final block*/
     422//      cout << "final block" << endl;
    421423    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-chars_avail));
    422 #ifdef TWOBLOCK_LEX
    423     parse_Escaped.do_final_block(lex0, parity, escape_Callouts, out_Callouts, EOF_mask);
    424     parse_Comment.do_final_block(lex0, escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
    425     parse_String.do_final_block(escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
    426     parse_HexStrings.do_final_block(lex0, marker[BLOCKS-1], out_Callouts, EOF_mask);
    427     parse_Names.do_final_block(lex0, marker[BLOCKS-1], out_Callouts, EOF_mask);
    428     parse_Numeric.do_final_block(lex0, marker[BLOCKS-1], out_Callouts, EOF_mask);
    429     parse_Keywords.do_final_block(lex0, marker[BLOCKS-1], out_Callouts, EOF_mask);
    430     prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex0, marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    431     out_Callouts.delmask |= ~EOF_mask;         
    432     Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], content_buf, &content_buf_ptr, del_sum, BLOCKS-1);
    433 #endif
    434 #ifndef TWOBLOCK_LEX
    435424    parse_Escaped.do_final_block(lex[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    436425    parse_Comment.do_final_block(lex[BLOCKS-1], escape_Callouts, marker[BLOCKS-1], out_Callouts, EOF_mask);
     
    442431    prepare_content_buffer.do_final_block(basis_bits[BLOCKS-1], lex[BLOCKS-1], marker[BLOCKS-1], parity, escape_Callouts, out_Callouts, EOF_mask);
    443432    out_Callouts.delmask |= ~EOF_mask;         
    444     Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], content_buf, &content_buf_ptr, del_sum, BLOCKS-1);
    445 #endif
    446    
    447     PERF_SEC_END(parser_timer, chars_read);
    448     PERF_SEC_START(postprocess_timer);
    449 
    450     Postprocessing(content_buf, (content_buf_ptr-content_buf)/BLOCK_SIZE+1, marker, del_sum);
    451 
     433    Build_ContentBuffer(out_Callouts, marker[BLOCKS-1], &content_buf_ptr, del_sum, BLOCKS-1);
     434    content_buf[cb_idx+1] = (char*)malloc(BUF_SIZE*2);
     435    Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker, del_sum, at_start, base_pos); 
     436    break;
     437  }
     438}
    452439
    453440//      print_register("marker.error",marker.error);
     
    455442//     for(int i=0;i<num_idx;i++)
    456443//       printf("%i,%i\n",numbers[i],dec_pl[i]);
    457    
     444/*   
    458445    for(int i=0; i<10;i++){
    459 //         cout << tokenArray[i].len<<endl;
    460446      if(tokenArray[i].type==str_token){
    461447        printf("String: ");
     
    488474      }
    489475
    490     }
     476    }*/
    491477     
    492  
    493     PERF_SEC_END(postprocess_timer, chars_read);
    494    
    495     fwrite(content_buf, 1, content_buf_ptr-content_buf, outfile);
    496 
    497 }
    498 
     478
     479}
     480
     481
     482int main(int argc, char * argv[]) {
     483
     484        char * infilename, * outfilename;
     485        FILE *infile, *outfile;
     486        struct stat fileinfo;
     487
     488        if (argc < 2) {
     489                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     490                exit(-1);
     491        }
     492
     493        infilename = argv[1];
     494        stat(infilename, &fileinfo);
     495        infile = fopen(infilename, "rb");
     496        if (!infile) {
     497                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     498                exit(-1);
     499        }
     500
     501        if (argc < 3) outfile = stdout;
     502        else {
     503                outfilename = argv[2];
     504                outfile = fopen(outfilename, "wb");
     505                if (!outfile) {
     506                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     507                        exit(-1);
     508                }
     509        }
     510
     511//      PERF_SEC_BIND(1);
     512
     513        PERF_SEC_INIT(s2p_lex_timer);
     514        PERF_SEC_INIT(parser_timer);
     515        PERF_SEC_INIT(postprocess_timer);
     516
     517        do_process(infile, outfile, fileinfo.st_size+1);
     518
     519        PERF_SEC_DUMP(s2p_lex_timer);
     520        PERF_SEC_DUMP(parser_timer);
     521        PERF_SEC_DUMP(postprocess_timer);
     522
     523        PERF_SEC_DESTROY(s2p_lex_timer);
     524        PERF_SEC_DESTROY(parser_timer);
     525        PERF_SEC_DESTROY(postprocess_timer);
     526
     527        fclose(infile);
     528        fclose(outfile);
     529
     530        return(0);
     531}
Note: See TracChangeset for help on using the changeset viewer.