Changeset 177


Ignore:
Timestamp:
Jul 8, 2008, 1:49:42 PM (11 years ago)
Author:
ksherdy
Message:

Update CopyAndFill? to include EOF sentinel.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/byteplex.c

    r172 r177  
    135135        }
    136136}
    137                                          
     137
    138138void U16LE_Buffer::DoByteplex() {
    139139        DoDuplex(src_buffer, packs_in_buffer, x16lo, x16hi);
     
    201201void U16_Buffer::PreparePseudoASCII_Stream() {
    202202        for (int pk = 0; pk < packs_in_buffer; pk++) {
    203                 x8data[pk] = simd_or(x16lo[pk], 
    204                                      simd_andc(simd_const_8(0x80), 
     203                x8data[pk] = simd_or(x16lo[pk],
     204                                     simd_andc(simd_const_8(0x80),
    205205                                               simd_eq_8(x16hi[pk], simd_const_8(0))));
    206206        }
     
    210210        for (int pk = 0; pk < packs_in_buffer; pk++) {
    211211                BytePack hi = simd_or(simd_or(x32hh[pk], x32hl[pk]), x32lh[pk]);
    212                 x8data[pk] = simd_or(x32ll[pk], 
    213                                      simd_andc(simd_const_8(0x80), 
     212                x8data[pk] = simd_or(x32ll[pk],
     213                                     simd_andc(simd_const_8(0x80),
    214214                                               simd_eq_8(hi, simd_const_8(0))));
    215215        }
     
    222222        code_clocker->cc_start_interval();
    223223#endif
    224         int bytes_read = fread(&((unsigned char *)src_buffer)[lgth], 1, bytes_to_read, infile);
     224        unsigned char * end_ptr = &((unsigned char *)src_buffer)[lgth];
     225        int bytes_read = fread(end_ptr, 1, bytes_to_read, infile);
     226        if (bytes_read < bytes_to_read) end_ptr[bytes_read] = '\0'; /* sentinel */
    225227#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == FILE_READING)
    226228        code_clocker->cc_end_interval(bytes_read);
     
    236238
    237239template <CodeUnit_Base C>
    238 void X8_Buffer<C>::InitializeBuffer(unsigned char * src, int lgth){     
    239         int byte_advance = BYTEPLEX_SIZE - lgth;       
     240void X8_Buffer<C>::InitializeBuffer(unsigned char * src, int lgth){
     241        int byte_advance = BYTEPLEX_SIZE - lgth;
    240242        int bytes_read = CopyAndFill(src, lgth, byte_advance);
    241243        Set_limits(bytes_read + lgth);
    242244}
    243245
    244 void U16_Buffer::InitializeBuffer(unsigned char * src, int lgth){       
    245         int byte_advance = BYTEPLEX_SIZE * 2 - lgth;   
     246void U16_Buffer::InitializeBuffer(unsigned char * src, int lgth){
     247        int byte_advance = BYTEPLEX_SIZE * 2 - lgth;
    246248        int bytes_read = CopyAndFill(src, lgth, byte_advance);
    247249        if (bytes_read % 2 != 0) {
     
    251253}
    252254
    253 void U32_Buffer::InitializeBuffer(unsigned char * src, int lgth){       
    254         int byte_advance = BYTEPLEX_SIZE * 4 - lgth;   
     255void U32_Buffer::InitializeBuffer(unsigned char * src, int lgth){
     256        int byte_advance = BYTEPLEX_SIZE * 4 - lgth;
    255257        int bytes_read = CopyAndFill(src, lgth, byte_advance);
    256258        if (bytes_read % 4 != 0) {
     
    263265
    264266template <CodeUnit_Base C>
    265 void X8_Buffer<C>::AdvanceInputBuffer(int advance_amt){ 
    266         int bytes_to_keep = units_in_buffer - advance_amt;     
     267void X8_Buffer<C>::AdvanceInputBuffer(int advance_amt){
     268        int bytes_to_keep = units_in_buffer - advance_amt;
    267269        int bytes_read = CopyAndFill(&((unsigned char *)src_buffer)[advance_amt],
    268270                                        bytes_to_keep, advance_amt);
     
    270272}
    271273
    272 void U16_Buffer::AdvanceInputBuffer(int advance_amt){   
    273         int bytes_to_keep = (units_in_buffer - advance_amt)*2; 
     274void U16_Buffer::AdvanceInputBuffer(int advance_amt){
     275        int bytes_to_keep = (units_in_buffer - advance_amt)*2;
    274276        int bytes_read = CopyAndFill(&((unsigned char *)src_buffer)[advance_amt*2],
    275277                                        bytes_to_keep, advance_amt*2);
     
    280282}
    281283
    282 void U32_Buffer::AdvanceInputBuffer(int advance_amt){   
    283         int bytes_to_keep = (units_in_buffer - advance_amt)*4; 
     284void U32_Buffer::AdvanceInputBuffer(int advance_amt){
     285        int bytes_to_keep = (units_in_buffer - advance_amt)*4;
    284286        int bytes_read = CopyAndFill(&((unsigned char *)src_buffer)[advance_amt*4],
    285287                                        bytes_to_keep, advance_amt*4);
     
    310312                surrogate_scope = simd_or(hi_surrogate_pending,
    311313                                          sisd_sfli(hi_surrogate, 8));
    312                                          
     314
    313315                u16_surrogate_error = simd_xor(surrogate_scope, lo_surrogate);
    314316                hi_surrogate_pending = sisd_sbli(hi_surrogate, 8 * (PACKSIZE-1));
     
    319321//              u16_FFFE_FFFF_accum = simd_or(u16_FFFE_FFFF_accum, u16_FFFE_FFFF);
    320322                u16_surrogate_error = simd_or(u16_surrogate_error, u16_FFFE_FFFF);
    321        
     323
    322324                if (bitblock_has_bit(u16_surrogate_error)) {
    323325                        CharSetValidationError("UTF-16 (relative position reported)",
     
    337339                /* The high byte of UCS-2 code units cannot be in the range D8-DF.
    338340                   This corresponds to the D800-DFFF range of illegal codepoints
    339                    reserved for UTF-16 surrogate pairs. Accumulate the results. 
     341                   reserved for UTF-16 surrogate pairs. Accumulate the results.
    340342                   To check, 0x20 is added to each such octet, mapping the D8-DF
    341343                   range to F8-FF and wrapping E0-FF values around.  The max value
    342344                   is then accumulated.  */
    343345                u16_surrogate_accum =
    344                         simd_max_8(u16_surrogate_accum, 
     346                        simd_max_8(u16_surrogate_accum,
    345347                                   simd_add_8(x16hi[pk], simd_const_8(0x20)));
    346348                /* The values FFFE and FFFF are excluded. */
     
    377379                /* The second octet has a max value of 0x10, corresponding to the
    378380                   maximum Unicode code point value of 0x10FFFF.  Accumulate the
    379                    maximum of all u32hl values observed. */ 
     381                   maximum of all u32hl values observed. */
    380382                u32hl_accum = simd_max_8(u32hl_accum, x32hl[pk]);
    381383                /* The third octet cannot be in the range D8-DF if the second octet
    382384                   is 0.  This corresponds to the D800-DFFF range of illegal codepoints
    383                    reserved for UTF-16 surrogate pairs. Accumulate the results. 
     385                   reserved for UTF-16 surrogate pairs. Accumulate the results.
    384386                   To check, 0x20 is added to each such octet, mapping the D8-DF
    385387                   range to F8-FF and wrapping E0-FF values around.  The max value
    386388                   is then accumulated.  */
    387389                u32_BMP_select = simd_eq_8(x32hl[pk], simd_const_8(0));
    388                 u32_surrogate_accum = 
    389                         simd_max_8(u32_surrogate_accum, 
     390                u32_surrogate_accum =
     391                        simd_max_8(u32_surrogate_accum,
    390392                                   simd_and(u32_BMP_select,
    391393                                            simd_add_8(x32lh[pk], simd_const_8(0x20))));
     
    414416        Byteplex * b;
    415417        if (likely(e->code_unit_size == SingleByte)) {
    416                 if (likely(e->code_unit_base == ASCII)) 
     418                if (likely(e->code_unit_base == ASCII))
    417419                        b = new X8_Buffer<ASCII>();
    418420                else b = new X8_Buffer<EBCDIC>();
     
    428430                case Unusual_2143: b = new U32_2143_Buffer(); break;
    429431                case Unusual_3412: b = new U32_3412_Buffer(); break;
    430         }       
     432        }
    431433        return b;
    432434}
     
    437439        return b;
    438440}
    439        
     441
    440442Byteplex * Byteplex::ByteplexFactory(Entity_Info * e, unsigned char * buffer_bytes, int buffer_size) {
    441443        Byteplex * b = ByteplexFactory(e);
     
    509511template <>
    510512void X8_Buffer<EBCDIC>::to_UTF8(int name_pos, int lgth, char * u8_ptr){
    511        
     513
    512514}
    513515void U16_Buffer::to_UTF8(int name_pos, int lgth, char * u8_ptr){
     
    560562                        u8_ptr[u8_lgth+1] = 0x80 + ((u32hl[i] & 0x03) << 4) + (u32lh[i] >> 4);
    561563                        u8_ptr[u8_lgth+2] = 0x80 + ((u32lh[i] & 0x0F) << 2) + (u32ll[i] >> 6);
    562                         u8_ptr[u8_lgth+3] = 0x80 + (u32ll[i] & 0x3F);           
     564                        u8_ptr[u8_lgth+3] = 0x80 + (u32ll[i] & 0x3F);
    563565                        u8_lgth += 4;
    564566                }
Note: See TracChangeset for help on using the changeset viewer.