Changeset 170


Ignore:
Timestamp:
Jun 25, 2008, 10:55:28 AM (11 years ago)
Author:
cameron
Message:

AdjustBufferEndForIncompleteSequences? for UTF-8 and UTF-16

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/engine.c

    r169 r170  
    345345}
    346346
     347template <class B>
     348inline void ParsingEngine<B>::AdjustBufferEndForIncompleteSequences() {
     349}
     350
     351template <>
     352inline void ParsingEngine<UTF8_Buffer>::AdjustBufferEndForIncompleteSequences() {
     353        if (*(cur()-1) >= 0xC0) buffer_rel_pos--;
     354        else if (*(cur()-2) >= 0xE0) buffer_rel_pos -= 2;
     355        else if (*(cur()-3) >= 0xF0) buffer_rel_pos -= 3;
     356}
     357
     358template <>
     359inline void ParsingEngine<U16_Buffer>::AdjustBufferEndForIncompleteSequences() {
     360        unsigned short last_u16_unit = *(GetCodeUnitPtr(AbsPos()-1));
     361        if ((last_u16_unit >= 0xD800) & (last_u16_unit <= 0xDC00)) buffer_rel_pos--;
     362}
     363
    347364
    348365#ifndef OPTIMIZE_SHORT_SCAN
     
    352369                                      buffer_rel_pos);
    353370  while (buffer_rel_pos >= BUFFER_SIZE) {
    354         while (at_UTF8_suffix(cur())) buffer_rel_pos--;
     371        AdjustBufferEndForIncompleteSequences();
    355372        FinalizeBuffer_action();
    356373        AdvanceBuffers();
     
    360377#endif
    361378
    362 inline bool at_UTF8_suffix(unsigned char x8data[]) {
    363         unsigned char code_unit = x8data[0];
    364         return ((code_unit >= 0x80) & (code_unit < 0xC0));
    365 }
    366 
    367379template <class B>
    368380inline void ParsingEngine<B>::ScanToMarkupStart() {
     
    370382        buffer_rel_pos = bitstream_scan(buf->item_stream[MarkupStart], buffer_rel_pos);
    371383        while (buffer_rel_pos >= BUFFER_SIZE) {
    372                 while (at_UTF8_suffix(cur())) buffer_rel_pos--;
     384                AdjustBufferEndForIncompleteSequences();
    373385                Text_action(GetCodeUnitPtr(text_or_markup_start), LengthFrom(text_or_markup_start));
    374386                text_or_markup_start = AbsPos();
     
    383395        buffer_rel_pos = bitstream_scan(buf->item_stream[CD_End_check], buffer_rel_pos);
    384396        while (buffer_rel_pos >= BUFFER_SIZE) {
    385                 while (at_UTF8_suffix(cur())) buffer_rel_pos--;
     397                AdjustBufferEndForIncompleteSequences();
    386398                Text_action(GetCodeUnitPtr(text_or_markup_start), LengthFrom(text_or_markup_start));
    387399                text_or_markup_start = AbsPos();
     
    405417    buffer_rel_pos += bitstream_scan0((SIMD_type *) &bitstream_ptr[1]);
    406418    while (buffer_rel_pos >= BUFFER_BLOCKS * BLOCKSIZE) {
    407       buffer_rel_pos = BUFFER_BLOCKS * BLOCKSIZE;
    408       while (at_UTF8_suffix(cur())) buffer_rel_pos--;
     419        buffer_rel_pos = BUFFER_BLOCKS * BLOCKSIZE;
     420        AdjustBufferEndForIncompleteSequences();
    409421        FinalizeBuffer_action();
    410422        AdvanceBuffers();
  • trunk/src/engine.h

    r160 r170  
    8787        void ScanToMarkupStart();  // Specialized version.
    8888        void ScanToCD_End_check(); // Specialized version.
    89 
     89        void AdjustBufferEndForIncompleteSequences();
    9090        void AdvanceBuffers();
    9191        /* Parsing routines. */
Note: See TracChangeset for help on using the changeset viewer.