Changeset 3155


Ignore:
Timestamp:
May 18, 2013, 3:14:22 PM (6 years ago)
Author:
lindanl
Message:

Add array parsing.

Location:
proto/PDF
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/PDF/cb_pablo.py

    r3129 r3155  
    244244          escape_Callouts.Oct2 &= ~escape_Callouts.Oct3
    245245          out_Callouts.delmask = escape_mark
    246        
    247 def Parse_Comment(lex, escape_Callouts, marker, out_Callouts):
    248   out_Callouts.mask = 0
    249   if lex.Percent:
    250         knownParen = 0
    251         CtCand = lex.Percent
    252         line_start = ~pablo.Advance(~lex.EOL)
    253         knownNonCtReg = pablo.SpanUpTo(line_start, pablo.ScanTo(line_start, lex.EOL | CtCand))
    254         newParen = knownNonCtReg & (escape_Callouts.LParen | escape_Callouts.RParen)
    255         while newParen &~ knownParen:
    256           knownParen |= newParen
    257           pscan = pablo.AdvanceThenScanTo(newParen & escape_Callouts.LParen, escape_Callouts.LParen | escape_Callouts.RParen)
    258           pscanReg = pablo.SpanUpTo(newParen & escape_Callouts.LParen, pscan)
    259           CtCand = CtCand &~ pscanReg
    260           knownNonCtReg = pablo.SpanUpTo(line_start, pablo.ScanTo(line_start, lex.EOL | CtCand))
    261           closed = pscan & escape_Callouts.RParen
    262           opener = pscan & escape_Callouts.LParen
    263           while pablo.inFile(opener):
    264             pscan = pablo.AdvanceThenScanTo(opener, escape_Callouts.RParen &~ closed)
    265             pscanReg = pablo.SpanUpTo(opener, pscan)
    266             CtCand = CtCand &~ pscanReg
    267             closed = pscan & escape_Callouts.RParen
    268             opener = pscan & escape_Callouts.LParen
    269             knownNonCtReg |= pscanReg
    270           newParen = knownNonCtReg & (escape_Callouts.LParen | escape_Callouts.RParen)
    271         comment = CtCand &~ knownNonCtReg
    272         out_Callouts.mask = pablo.InclusiveSpan(comment, pablo.ScanTo(comment, lex.EOL))
    273 
    274 def Parse_String(escape_Callouts, marker, out_Callouts):
    275         unmatched = escape_Callouts.RParen
    276         marker.error = 0
    277         pscan = 0
    278         qscan = 0
    279         pscan = pablo.ScanTo(pablo.Advance(escape_Callouts.LParen), escape_Callouts.LParen | escape_Callouts.RParen)
    280         qscan = pablo.ScanTo(pablo.Advance(escape_Callouts.RParen), escape_Callouts.LParen | escape_Callouts.RParen)
    281         instring = pablo.ExclusiveSpan(escape_Callouts.LParen, pscan)
    282         closed = pscan & escape_Callouts.RParen
    283         unclosed = pscan & escape_Callouts.LParen | qscan & escape_Callouts.RParen
    284         marker.error = pablo.atEOF(pscan)
    285         all_closed = closed
    286         while unclosed:
    287                 pscan = pablo.ScanTo(pablo.Advance(unclosed & escape_Callouts.LParen), unclosed)
    288                 qscan = pablo.ScanTo(pablo.Advance(unclosed & escape_Callouts.RParen), unclosed)
    289                 instring |= pablo.SpanUpTo(unclosed & escape_Callouts.LParen, pscan)
    290                 closed = pscan & escape_Callouts.RParen
    291                 unclosed = pscan & escape_Callouts.LParen | qscan & escape_Callouts.RParen
    292                 all_closed |= closed
    293                 marker.error |= pablo.atEOF(pscan)
    294         #
    295         # Any closing paren that was not actually used to close
    296         # an opener is in error.
    297         out_Callouts.mask |= instring
    298         marker.starts = pablo.Advance(~instring)&instring
    299         marker.ends = pablo.ScanThru(marker.starts, instring)
    300         marker.error |= escape_Callouts.RParen &~ all_closed       
    301        
    302 
    303 #
     246
    304247# Modified version with comment processing
    305248#
     
    369312        marker.error |= escape_Callouts.RParen &~ closed_RParen &~ pablo.SpanUpTo(comment_start, comment_end)
    370313
    371 
    372314         
    373315def Parse_HexStrings(lex, marker, out_Callouts) :
     
    388330        names_follows = pablo.ScanThru(pablo.Advance(name_starts), lex.Regular)
    389331        out_Callouts.names_escapes = lex.Hash & pablo.Lookahead(lex.Hex) & pablo.Lookahead(lex.Hex,2) &~ out_Callouts.mask
    390         out_Callouts.mask |= pablo.InclusiveSpan(name_starts,names_follows)
     332        out_Callouts.mask |= pablo.SpanUpTo(name_starts,names_follows)
    391333        marker.starts |= name_starts
    392334        marker.ends |= names_follows
     
    399341        marker.ends |= numeric_follows
    400342
    401        
     343# Parse everything else: keywords, operators, array brackets   
    402344def Parse_Keywords(lex, marker, out_Callouts) :
    403345        keywords_starts = (lex.Regular &~ pablo.Advance(lex.Regular)) &~ out_Callouts.mask
    404346        keywords_follows = pablo.ScanThru(keywords_starts, lex.Regular)
    405         marker.starts |= keywords_starts       
     347        marker.starts |= keywords_starts | ((lex.LBracket | lex.RBracket)&~ out_Callouts.mask)
    406348        marker.ends |= keywords_follows
     349       
    407350       
    408351       
  • proto/PDF/cb_template.cpp

    r3145 r3155  
    6363  int_token,
    6464  flt_token,
    65   kw_token
     65  kw_token,
     66  array_token
    6667};
    6768
     
    7879  int len;
    7980  int token_pos;
     81};
     82
     83struct buf_State{
     84  int at_start;
     85  int at_optr;
     86  int base_pos;
     87  int array_depth;
     88  int array_start_tokenidx; 
    8089};
    8190
     
    246255        }
    247256}
    248 /*
    249 static inline void Postprocessing(char* cb, int cb_size, char* cb_new, BitBlock * marker_starts, BitBlock * marker_ends, ubitblock * unitsums_per_reg, int & at_start, int & at_optr, int & base_pos){   
     257
     258static inline void Postprocessing(char* cb, int cb_size, char* cb_new, BitBlock * marker_starts, BitBlock * marker_ends, ubitblock * unitsums_per_reg, struct buf_State & buf_state){   
    250259     
    251260  int pos = 0;
    252  
    253   if(!at_start){
    254     if(at_optr){
     261  int len = 0;
     262 
     263  if(!buf_state.at_start){
     264    if(buf_state.at_optr){
    255265      optrArray[optr_idx].char_ptr = cb;
    256266    }
     
    266276  }
    267277   
    268   int block_pos = base_pos;
    269   for(int i=0; i<BLOCKS; i++){
    270     if(bitblock::any(simd_or(marker_starts[i],marker_ends[i]))){
    271       ForwardScannerWithBaseCounts16<BitBlock, ScanWord> iter_start(&(marker_starts[i]), unitsums_per_reg[i]._128);
    272       ForwardScannerWithBaseCounts16<BitBlock, ScanWord> iter_end(&(marker_ends[i]), unitsums_per_reg[i]._128);
    273       iter_start.scan_to_next();
    274       iter_end.scan_to_next();
    275       while(!iter_start.is_done() || !iter_end.is_done()) {
    276         if(at_start){
    277           pos = block_pos + (iter_start.get_pos());
    278           if (cb[pos]== 0x2f){ 
    279             tokenArray[token_idx].type = name_token;
    280             tokenArray[token_idx].token.char_ptr = &cb[pos];
    281           }
    282           else if(cb[pos]== 0x28){     
    283             tokenArray[token_idx].type = str_token;
    284             tokenArray[token_idx].token.char_ptr = &cb[pos];
    285           }
    286           else if (cb[pos]== 0x0c){     
    287             tokenArray[token_idx].type = hex_token;
    288             tokenArray[token_idx].token.char_ptr = &cb[pos];
    289           }
    290           else if (cb[pos]== '-' || cb[pos]== '+' || (cb[pos]>='0'&&cb[pos]<='9')){
    291             Digit_postprocessing(cb, pos);
    292           }
    293           else if ( (cb[pos]== 'n' && cb[pos+1]== 'u' && cb[pos+2]== 'l'  && cb[pos+3]== 'l')
    294             || (cb[pos]== 't' && cb[pos+1]== 'r' && cb[pos+2]== 'u'  && cb[pos+3]== 'e')
    295             || (cb[pos]== 'f' && cb[pos+1]== 'a' && cb[pos+2]== 'l'  && cb[pos+3]== 's' && cb[pos+3]== 'e')){
    296            
    297             tokenArray[token_idx].type = kw_token;
    298             tokenArray[token_idx].token.char_ptr = &cb[pos];   
    299           }
    300           else{
    301             //operator
    302             optrArray[optr_idx].char_ptr = &cb[pos];
    303             optrArray[optr_idx].token_pos = token_idx-1;
    304             at_optr = 1;
    305           }
    306           iter_start.scan_to_next();
    307           at_start = 0;
    308         }
    309         else{
    310           int len = block_pos + (iter_end.get_pos()) - pos;     
    311           if(at_optr){
    312             optrArray[optr_idx].len = len;
    313             optr_idx++;
    314             at_optr = 0;
    315           }
    316           else{
    317             tokenArray[token_idx].len = len;
    318             token_idx++;
    319           }
    320           iter_end.scan_to_next();
    321           at_start = 1;
    322         }
    323       }
    324     }   
    325 
    326     block_pos += unitsums_per_reg[i]._16[7];
    327   }
    328   if(!at_start){
    329     base_pos = cb_size - pos;
    330 //     if(base_pos<0){
    331 //       cout << "in markup " << cb_size << "," << pos << "," << base_pos << endl;
    332 //     }
    333     memcpy(cb_new, &cb[pos], base_pos);
    334   }
    335   else
    336     base_pos = 0;
    337 }
    338 */
    339 static inline void Postprocessing(char* cb, int cb_size, char* cb_new, BitBlock * marker_starts, BitBlock * marker_ends, ubitblock * unitsums_per_reg, int & at_start, int & at_optr, int & base_pos){   
    340      
    341   int pos = 0;
    342   int len = 0;
    343  
    344   if(!at_start){
    345     if(at_optr){
    346       optrArray[optr_idx].char_ptr = cb;
    347     }
    348     else{
    349       if (tokenArray[token_idx].type == flt_token || tokenArray[token_idx].type == int_token){
    350         num_idx--;
    351         Digit_postprocessing(cb, 0);
    352       }
    353       else{
    354         tokenArray[token_idx].token.char_ptr = cb;
    355       }
    356     }
    357   }
    358    
    359   int block_pos = base_pos;
     278  int block_pos = buf_state.base_pos;
    360279  uint16_t * unit_sums = &(((uint16_t *) unitsums_per_reg)[7]);
    361280
     
    388307      tokenArray[token_idx].token.char_ptr = &cb[pos]; 
    389308    }
     309    else if(cb[pos] =='['){
     310      if(buf_state.array_depth==0)
     311        buf_state.array_start_tokenidx = token_idx;
     312      buf_state.array_depth++;     
     313      iter_start.scan_to_next();
     314      continue;
     315    }
     316    else if(cb[pos] ==']'){
     317      buf_state.array_depth--;
     318      if(buf_state.array_depth==0){
     319        tokenArray[token_idx].type = array_token;
     320        tokenArray[token_idx].token.idx = buf_state.array_start_tokenidx;
     321      }     
     322      token_idx++;
     323      iter_start.scan_to_next();
     324      continue;
     325    }
    390326    else{
    391327      //operator
    392328      optrArray[optr_idx].char_ptr = &cb[pos];
    393329      optrArray[optr_idx].token_pos = token_idx-1;
    394       at_optr = 1;
     330      buf_state.at_optr = 1;
    395331    }
    396332    iter_start.scan_to_next();
    397333    if(iter_end.is_done()){
    398       at_start = 0;
     334      buf_state.at_start = 0;
    399335      break;
    400336    }
    401337    len = block_pos + (iter_end.get_pos()) - pos;       
    402     if(at_optr){
     338    if(buf_state.at_optr){
    403339      optrArray[optr_idx].len = len;
    404340      optr_idx++;
    405       at_optr = 0;
     341      buf_state.at_optr = 0;
    406342    }
    407343    else{
     
    410346    }
    411347    iter_end.scan_to_next();
    412     at_start = 1;
     348    buf_state.at_start = 1;
    413349  }
    414350 
    415   if(!at_start){
    416     base_pos = cb_size - pos;
    417     memcpy(cb_new, &cb[pos], base_pos);
     351  if(!buf_state.at_start){
     352    buf_state.base_pos = cb_size - pos;
     353    memcpy(cb_new, &cb[pos], buf_state.base_pos);
    418354  }
    419355  else
    420     base_pos = 0;
     356    buf_state.base_pos = 0;
    421357}
    422358
     
    450386  int cb_idx = 0; 
    451387  ubitblock unitsums_per_reg[BLOCKS+1];
    452   int at_start = 1;
    453   int at_optr = 0;
    454   int base_pos = 0;
     388  struct buf_State buf_state;
     389  buf_state.at_start = 1;
     390  buf_state.at_optr = 0;
     391  buf_state.base_pos = 0;
     392  buf_state.array_depth = 0;
     393  buf_state.array_start_tokenidx = 0;
    455394
    456395  unitsums_per_reg[0]._128 = simd<16>::constant<0>();
     
    505444
    506445    content_buf[cb_idx+1] = (char*)malloc(BUF_SIZE*2);
    507     Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker_starts, marker_ends, unitsums_per_reg, at_start, at_optr, base_pos);
     446    Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker_starts, marker_ends, unitsums_per_reg, buf_state);
    508447//     ints_2_floats(numbers, dec_pl, num_rslt, num_idx);
    509448//      ints_x_floats(numbers, dec_pl, num_rslt, num_idx);
    510449//    simd_ints_2_floats(numbers, dec_pl, num_rslt, num_idx);
    511     content_buf_ptr =  content_buf[cb_idx+1]+base_pos;
     450    content_buf_ptr =  content_buf[cb_idx+1]+buf_state.base_pos;
    512451    cb_idx++; 
    513452   
     
    553492    marker_ends[BLOCKS-1] = marker.ends;
    554493    content_buf[cb_idx+1] = (char*)malloc(BUF_SIZE*2);
    555     Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker_starts, marker_ends, unitsums_per_reg, at_start, at_optr, base_pos); 
     494    Postprocessing(content_buf[cb_idx], content_buf_ptr-content_buf[cb_idx], content_buf[cb_idx+1], marker_starts, marker_ends, unitsums_per_reg, buf_state); 
    556495    break;
    557496  }
     
    564503    for(int i=0;i<num_idx;i++)
    565504      printf("%i,%i\n",numbers[i],dec_pl[i]);*/
    566 /*   
     505   
    567506    for(int i=0; i<100;i++){
    568507      if(tokenArray[i].type==str_token){
     
    595534        printf("\n");
    596535      }
     536      if(tokenArray[i].type==array_token){
     537        printf("Array: %i, %i\n", tokenArray[i].token.idx, i);
     538      }
    597539    }
    598540   
    599541
    600542    for(int i=0;i<100;i++)  {
    601         printf("len at %i is %i\n",i, optrArray[i].len);
     543//         printf("len at %i is %i\n",i, optrArray[i].len);
    602544        for(int j=0; j<optrArray[i].len; j++)
    603545          printf("%c",optrArray[i].char_ptr[j]);
    604546        printf("\n");
    605547    }
    606 */
     548
    607549}
    608550
Note: See TracChangeset for help on using the changeset viewer.