source: trunk/src/charsets/ext_ascii_16.c @ 8

Last change on this file since 8 was 8, checked in by cameron, 12 years ago

Sentinels

File size: 3.7 KB
Line 
1/*  ext_ascii_16.c - Lexer object for 16-bit ASCII-based character sets.
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include "ext_ascii_16.h"
10
11Ext_ASCII_16_Lexer::Ext_ASCII_16_Lexer(XML_Buffer *b, ParallelStreamSet *p) :
12  Lexer(b, p) {
13}
14
15Ext_ASCII_16LE_Lexer::Ext_ASCII_16LE_Lexer(XML_Buffer *b, ParallelStreamSet *p) :
16  Ext_ASCII_16_Lexer(b, p) {
17}
18
19Ext_ASCII_16BE_Lexer::Ext_ASCII_16BE_Lexer(XML_Buffer *b, ParallelStreamSet *p) :
20  Ext_ASCII_16_Lexer(b, p) {
21}
22
23#define min(a,b) ((a) < (b) ? (a) : (b))
24
25const int X16_CODE_UNIT_BYTES = 2;  /* 8-bit code units in this file. */
26const int X16_BYTE_STREAM_SIZE = 
27  (BUFFER_BLOCKS * BLOCKSIZE + LOOKAHEAD_POSITIONS) * X16_CODE_UNIT_BYTES;
28
29int Ext_ASCII_16BE_Lexer::AdvanceBuffer(int newpos) {
30  unsigned char * buf_ptr = xml_buf->GetBytePtr(newpos * X16_CODE_UNIT_BYTES);
31  int avail_bytes = xml_buf->PrepareBytes(X16_BYTE_STREAM_SIZE);
32  int new_blocks = min(BUFFER_BLOCKS, 
33                       (avail_bytes/X16_CODE_UNIT_BYTES + BLOCKSIZE - 1)/BLOCKSIZE);
34  int new_packs = new_blocks * 8 + 1;
35#ifdef BUFFER_PROFILING
36  start_BOM_interval(bitstream_timer);
37#endif
38  BytePack * src_data = (BytePack *) buf_ptr;
39  for (int pk = 0; pk < new_packs; pk++) {
40    BytePack s0 = sisd_load_unaligned(&src_data[2*pk]);
41    BytePack s1 = sisd_load_unaligned(&src_data[2*pk+1]);
42#if (BYTE_ORDER == LITTLE_ENDIAN)
43    BytePack x16h = simd_pack_16_ll(s1, s0);
44    BytePack x16l = simd_pack_16_hh(s1, s0);
45#endif
46#if (BYTE_ORDER == BIG_ENDIAN)
47    BytePack x16h = simd_pack_16_hh(s0, s1);
48    BytePack x16l = simd_pack_16_ll(s0, s1);
49#endif
50    parsing_engine_data->x8data[pk] = 
51      simd_or(x16l, 
52              simd_andc(simd_const_8(0x80), 
53                        simd_eq_8(x16h, simd_const_8(0))));
54//  Why do the following cause a segfault?
55//    x16hi[pk] = x16h;
56//    x16lo[pk] = x16l;
57    sisd_store_unaligned(x16h, &x16hi[pk]);
58    sisd_store_unaligned(x16l, &x16lo[pk]);
59  }
60  ComputeLexicalItemStreams(new_blocks);
61  EstablishSentinels(avail_bytes/X16_CODE_UNIT_BYTES);
62  return avail_bytes/X16_CODE_UNIT_BYTES;
63}
64
65int Ext_ASCII_16LE_Lexer::AdvanceBuffer(int newpos) {
66  unsigned char * buf_ptr = xml_buf->GetBytePtr(newpos * X16_CODE_UNIT_BYTES);
67  int avail_bytes = xml_buf->PrepareBytes(X16_BYTE_STREAM_SIZE);
68  int new_blocks = min(BUFFER_BLOCKS, 
69                       (avail_bytes/X16_CODE_UNIT_BYTES + BLOCKSIZE - 1)/BLOCKSIZE);
70  int new_packs = new_blocks * 8 + 1;
71#ifdef BUFFER_PROFILING
72  start_BOM_interval(bitstream_timer);
73#endif
74  BytePack * src_data = (BytePack *) buf_ptr;
75  for (int pk = 0; pk < new_packs; pk++) {
76    BytePack s0 = sisd_load_unaligned(&src_data[2*pk]);
77    BytePack s1 = sisd_load_unaligned(&src_data[2*pk+1]);
78#if (BYTE_ORDER == LITTLE_ENDIAN)
79    BytePack x16l = simd_pack_16_ll(s1, s0);
80    BytePack x16h = simd_pack_16_hh(s1, s0);
81#endif
82#if (BYTE_ORDER == BIG_ENDIAN)
83    BytePack x16l = simd_pack_16_hh(s0, s1);
84    BytePack x16h = simd_pack_16_ll(s0, s1);
85#endif
86    parsing_engine_data->x8data[pk] = 
87      simd_or(x16l, 
88              simd_andc(simd_const_8(0x80), 
89                        simd_eq_8(x16h, simd_const_8(0))));
90//  Why do the following cause a segfault?
91//    x16hi[pk] = x16h;
92//    x16lo[pk] = x16l;
93    sisd_store_unaligned(x16h, &x16hi[pk]);
94    sisd_store_unaligned(x16l, &x16lo[pk]);
95  }
96  ComputeLexicalItemStreams(new_blocks);
97  EstablishSentinels(avail_bytes/X16_CODE_UNIT_BYTES);
98  return avail_bytes/X16_CODE_UNIT_BYTES;
99}
100
101
102int Ext_ASCII_16_Lexer::BOM_size(int rel_pos) {
103  if ((((unsigned char *) x16hi)[rel_pos] == 0xFE) &&
104      (((unsigned char *) x16lo)[rel_pos] == 0xFF)) return 1;
105  else return 0;
106}
107
108
Note: See TracBrowser for help on using the repository browser.