Changeset 6 for trunk/src


Ignore:
Timestamp:
Dec 21, 2007, 8:55:58 AM (11 years ago)
Author:
cameron
Message:

Ext_ASCII_16LE/charset_family updates.

Location:
trunk/src
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/charsets/charset_family.h

    r4 r6  
    11/*  charset_family.h
    2     copyright (c) 2007, Robert D. Cameron
    32    Copyright (c) 2007, Robert D. Cameron.
    43    Licensed to the public under the Open Software License 3.0.
     
    1413
    1514    The Ext_ASCII_8 family is very extensive, including UTF-8 and
    16     the various ISO Latin character sets.
     15    the various ISO Latin character sets, among others.
    1716
    1817    Detection based on the first 4 bytes of an XML entity is
  • trunk/src/charsets/ext_ascii_16.c

    r4 r6  
    3838  BytePack * src_data = (BytePack *) buf_ptr;
    3939  for (int pk = 0; pk < new_packs; pk++) {
    40 printf("pk = %i\n", pk);
    4140    BytePack s0 = sisd_load_unaligned(&src_data[2*pk]);
    4241    BytePack s1 = sisd_load_unaligned(&src_data[2*pk+1]);
    43 print_bit_block("s0", s0);
    44 print_bit_block("s1", s1);
    4542#if (BYTE_ORDER == LITTLE_ENDIAN)
    4643    BytePack x16h = simd_pack_16_ll(s1, s0);
     
    5148    BytePack x16l = simd_pack_16_ll(s0, s1);
    5249#endif
    53 print_bit_block("x16h", x16h);
    54 print_bit_block("x16l", x16l);
    5550    parsing_engine_data->x8data[pk] =
    5651      simd_or(x16l,
    5752              simd_andc(simd_const_8(0x80),
    5853                        simd_eq_8(x16h, simd_const_8(0))));
    59 //    sisd_store_unaligned(x16h, &x16hi[pk]);
    60 //    sisd_store_unaligned(x16l, &x16lo[pk]);
     54//  Why do the following cause a segfault?
     55//    x16hi[pk] = x16h;
     56//    x16lo[pk] = x16l;
     57    sisd_store_unaligned(x16h, &x16hi[pk]);
     58    sisd_store_unaligned(x16l, &x16lo[pk]);
    6159  }
    6260  ComputeLexicalItemStreams(new_blocks);
     
    8987              simd_andc(simd_const_8(0x80),
    9088                        simd_eq_8(x16h, simd_const_8(0))));
    91 //    sisd_store_unaligned(x16h, &x16hi[pk]);
    92 //    sisd_store_unaligned(x16l, &x16lo[pk]);
     89//  Why do the following cause a segfault?
     90//    x16hi[pk] = x16h;
     91//    x16lo[pk] = x16l;
     92    sisd_store_unaligned(x16h, &x16hi[pk]);
     93    sisd_store_unaligned(x16l, &x16lo[pk]);
    9394  }
    9495  ComputeLexicalItemStreams(new_blocks);
  • trunk/src/charsets/ext_ascii_16.h

    r4 r6  
    3232};
    3333
    34 // Identify all bytes in the range 0xD8 through 0xDF.
     34// Identify all positions in the range 0xD800 through 0xDFFF.
    3535inline BytePack mark_surrogates(BytePack u16hi, BytePack u16lo) {
    3636  return simd_eq_8(simd_andc(u16hi, simd_const_8(0x07)),
     
    3838}
    3939
    40 // Identify all pairs of bytes either FFFE or FFFF.
     40// Identify all positions that have either FFFE or FFFF.
    4141inline BytePack mark_FFFE_FFFF(BytePack u16hi, BytePack u16lo) {
    4242  return simd_eq_8(simd_and(u16hi, simd_or(u16lo, simd_const_8(1))),
  • trunk/src/engine.c

    r4 r6  
    6666      printf("Ext_ASCII_16BE document detected.\n");
    6767      lex = new Ext_ASCII_16BE_Lexer::Ext_ASCII_16BE_Lexer(xml_buf, &buf);
     68      break;
     69    case Ext_ASCII_16LE:
     70      printf("Ext_ASCII_16LE document detected.\n");
     71      lex = new Ext_ASCII_16LE_Lexer::Ext_ASCII_16LE_Lexer(xml_buf, &buf);
    6872      break;
    6973    default:
Note: See TracChangeset for help on using the changeset viewer.