source: trunk/src/charsets/ext_ascii_16.c @ 35

Last change on this file since 35 was 15, checked in by cameron, 12 years ago

Bytespace scanning in XML declarations; various updates

File size: 3.8 KB
Line 
1/*  ext_ascii_16.c - Lexer object for 16-bit ASCII-based character sets.
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include "ext_ascii_16.h"
10
11Ext_ASCII_16_Lexer::Ext_ASCII_16_Lexer(XML_Buffer *b, ParallelStreamSet *p) :
12  Lexer(b, p) {
13}
14
15Ext_ASCII_16LE_Lexer::Ext_ASCII_16LE_Lexer(XML_Buffer *b, ParallelStreamSet *p) :
16  Ext_ASCII_16_Lexer(b, p) {
17}
18
19Ext_ASCII_16BE_Lexer::Ext_ASCII_16BE_Lexer(XML_Buffer *b, ParallelStreamSet *p) :
20  Ext_ASCII_16_Lexer(b, p) {
21}
22
23#define min(a,b) ((a) < (b) ? (a) : (b))
24
25const int X16_CODE_UNIT_BYTES = 2;  /* 8-bit code units in this file. */
26const int X16_BYTE_STREAM_SIZE = 
27  (BUFFER_BLOCKS * BLOCKSIZE + LOOKAHEAD_POSITIONS) * X16_CODE_UNIT_BYTES;
28
29int Ext_ASCII_16BE_Lexer::AdvanceBuffer(int newpos) {
30  int byte_pos = newpos * X16_CODE_UNIT_BYTES;
31  int avail_bytes = xml_buf->PrepareBytes(byte_pos, X16_BYTE_STREAM_SIZE);
32  unsigned char * buf_ptr = xml_buf->BytePtr(byte_pos);
33  int new_blocks = min(BUFFER_BLOCKS, 
34                       (avail_bytes/X16_CODE_UNIT_BYTES + BLOCKSIZE - 1)/BLOCKSIZE);
35  int new_packs = new_blocks * 8 + 1;
36#ifdef BUFFER_PROFILING
37  start_BOM_interval(bitstream_timer);
38#endif
39  BytePack * src_data = (BytePack *) buf_ptr;
40  for (int pk = 0; pk < new_packs; pk++) {
41    BytePack s0 = sisd_load_unaligned(&src_data[2*pk]);
42    BytePack s1 = sisd_load_unaligned(&src_data[2*pk+1]);
43#if (BYTE_ORDER == LITTLE_ENDIAN)
44    BytePack x16h = simd_pack_16_ll(s1, s0);
45    BytePack x16l = simd_pack_16_hh(s1, s0);
46#endif
47#if (BYTE_ORDER == BIG_ENDIAN)
48    BytePack x16h = simd_pack_16_hh(s0, s1);
49    BytePack x16l = simd_pack_16_ll(s0, s1);
50#endif
51    parsing_engine_data->x8data[pk] = 
52      simd_or(x16l, 
53              simd_andc(simd_const_8(0x80), 
54                        simd_eq_8(x16h, simd_const_8(0))));
55//  Why do the following cause a segfault?
56//    x16hi[pk] = x16h;
57//    x16lo[pk] = x16l;
58    sisd_store_unaligned(x16h, &x16hi[pk]);
59    sisd_store_unaligned(x16l, &x16lo[pk]);
60  }
61  ComputeLexicalItemStreams(new_blocks);
62  EstablishSentinels(avail_bytes/X16_CODE_UNIT_BYTES);
63  return avail_bytes/X16_CODE_UNIT_BYTES;
64}
65
66int Ext_ASCII_16LE_Lexer::AdvanceBuffer(int newpos) {
67  int byte_pos = newpos * X16_CODE_UNIT_BYTES;
68  int avail_bytes = xml_buf->PrepareBytes(byte_pos, X16_BYTE_STREAM_SIZE);
69  unsigned char * buf_ptr = xml_buf->BytePtr(byte_pos);
70  int new_blocks = min(BUFFER_BLOCKS, 
71                       (avail_bytes/X16_CODE_UNIT_BYTES + BLOCKSIZE - 1)/BLOCKSIZE);
72  int new_packs = new_blocks * 8 + 1;
73#ifdef BUFFER_PROFILING
74  start_BOM_interval(bitstream_timer);
75#endif
76  BytePack * src_data = (BytePack *) buf_ptr;
77  for (int pk = 0; pk < new_packs; pk++) {
78    BytePack s0 = sisd_load_unaligned(&src_data[2*pk]);
79    BytePack s1 = sisd_load_unaligned(&src_data[2*pk+1]);
80#if (BYTE_ORDER == LITTLE_ENDIAN)
81    BytePack x16l = simd_pack_16_ll(s1, s0);
82    BytePack x16h = simd_pack_16_hh(s1, s0);
83#endif
84#if (BYTE_ORDER == BIG_ENDIAN)
85    BytePack x16l = simd_pack_16_hh(s0, s1);
86    BytePack x16h = simd_pack_16_ll(s0, s1);
87#endif
88    parsing_engine_data->x8data[pk] = 
89      simd_or(x16l, 
90              simd_andc(simd_const_8(0x80), 
91                        simd_eq_8(x16h, simd_const_8(0))));
92//  Why do the following cause a segfault?
93//    x16hi[pk] = x16h;
94//    x16lo[pk] = x16l;
95    sisd_store_unaligned(x16h, &x16hi[pk]);
96    sisd_store_unaligned(x16l, &x16lo[pk]);
97  }
98  ComputeLexicalItemStreams(new_blocks);
99  EstablishSentinels(avail_bytes/X16_CODE_UNIT_BYTES);
100  return avail_bytes/X16_CODE_UNIT_BYTES;
101}
102
103
104int Ext_ASCII_16_Lexer::BOM_size(int rel_pos) {
105  if ((((unsigned char *) x16hi)[rel_pos] == 0xFE) &&
106      (((unsigned char *) x16lo)[rel_pos] == 0xFF)) return 1;
107  else return 0;
108}
109
110
Note: See TracBrowser for help on using the repository browser.