Changeset 129 for trunk/src/engine.c


Ignore:
Timestamp:
May 4, 2008, 5:17:12 AM (11 years ago)
Author:
cameron
Message:

UTF-8 validation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/engine.c

    r128 r129  
    172172        lexer = Lexer<C>::LexerFactory(e, buf);
    173173        bitplex->TransposeToBitStreams(byteplex->x8data, blocks_in_buffer);
    174         lexer->AnalyzeBuffer(bitplex->x8basis, buffer_base_pos, buffer_limit_pos);
     174        lexer->AnalyzeBuffer(bitplex->x8basis, buffer_base_pos, buffer_rel_pos, buffer_limit_pos);
    175175}
    176176
     
    188188template <CodeUnit_Base C>
    189189inline void ParsingEngine<C>::AdvanceBuffers(int preserve_pos){
     190
    190191        int advance_amt = min(preserve_pos, text_or_markup_start) - buffer_base_pos;
    191192        advance_amt &= -PACKSIZE; // maintain alignment
     
    198199        byteplex->PreparePseudoASCII_Stream();
    199200        bitplex->TransposeToBitStreams(byteplex->x8data, blocks_in_buffer);
    200         lexer->AnalyzeBuffer(bitplex->x8basis, buffer_base_pos, buffer_limit_pos);
     201        lexer->AnalyzeBuffer(bitplex->x8basis, buffer_base_pos, buffer_rel_pos, buffer_limit_pos);
    201202}
    202203
     
    250251                                      buffer_rel_pos);
    251252  while (buffer_rel_pos >= BUFFER_SIZE) {
     253        while (at_UTF8_suffix(cur())) buffer_rel_pos--;
    252254        FinalizeBuffer_action(preserve_pos);
    253255        AdvanceBuffers(preserve_pos);
     
    257259#endif
    258260
     261inline bool at_UTF8_suffix(unsigned char x8data[]) {
     262        unsigned char code_unit = x8data[0];
     263        return ((code_unit >= 0x80) & (code_unit < 0xC0));
     264}
     265
    259266template <CodeUnit_Base C>
    260267inline void ParsingEngine<C>::ScanToMarkupStart() {
     
    262269        text_or_markup_start = AbsPos();
    263270        buffer_rel_pos = bitstream_scan(buf->item_stream[MarkupStart], buffer_rel_pos);
    264         while (buffer_rel_pos >= BUFFER_SIZE) {
     271        while (buffer_rel_pos >= BUFFER_SIZE) {
     272                while (at_UTF8_suffix(cur())) buffer_rel_pos--;
    265273                Text_action(GetCodeUnitPtr(text_or_markup_start), LengthFrom(text_or_markup_start));
    266274                text_or_markup_start = AbsPos();
     
    276284        buffer_rel_pos = bitstream_scan(buf->item_stream[CD_End_check], buffer_rel_pos);
    277285        while (buffer_rel_pos >= BUFFER_SIZE) {
     286                while (at_UTF8_suffix(cur())) buffer_rel_pos--;
    278287                Text_action(GetCodeUnitPtr(text_or_markup_start), LengthFrom(text_or_markup_start));
    279288                text_or_markup_start = AbsPos();
     
    298307    while (buffer_rel_pos >= BUFFER_BLOCKS * BLOCKSIZE) {
    299308      buffer_rel_pos = BUFFER_BLOCKS * BLOCKSIZE;
     309      while (at_UTF8_suffix(cur())) buffer_rel_pos--;
    300310      FinalizeBuffer_action(preserve_pos);
    301311          AdvanceBuffers(preserve_pos);
Note: See TracChangeset for help on using the changeset viewer.