source: trunk/src/charsets/ext_ascii_8.c @ 6

Last change on this file since 6 was 4, checked in by cameron, 12 years ago

Initial import of parabix-0.36

File size: 1.4 KB
Line 
1/*  ext_ascii_8.c - Lexer object for 8-bit ASCII-based character sets.
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8#include "ext_ascii_8.h"
9
10Ext_ASCII_8_Lexer::Ext_ASCII_8_Lexer(XML_Buffer *b, ParallelStreamSet *p) : Lexer(b, p) {
11}
12
13
14#define min(a,b) ((a) < (b) ? (a) : (b))
15
16const int CODE_UNIT_BYTES = 1;  /* 8-bit code units in this file. */
17const int BYTE_STREAM_SIZE = (BUFFER_BLOCKS * BLOCKSIZE + LOOKAHEAD_POSITIONS) * CODE_UNIT_BYTES;
18
19int Ext_ASCII_8_Lexer::AdvanceBuffer(int newpos) {
20  unsigned char * buf_ptr = xml_buf->GetBytePtr(newpos * CODE_UNIT_BYTES);
21  int avail_bytes = xml_buf->PrepareBytes(BYTE_STREAM_SIZE);
22  int new_blocks = min(BUFFER_BLOCKS, (avail_bytes + BLOCKSIZE - 1)/BLOCKSIZE);
23  int new_packs = new_blocks * 8 + 1;
24#ifdef BUFFER_PROFILING
25  start_BOM_interval(bitstream_timer);
26#endif
27  BytePack * src_data = (BytePack *) buf_ptr;
28  for (int pk = 0; pk < new_packs; pk++) {
29    parsing_engine_data->x8data[pk] = sisd_load_unaligned(&src_data[pk]);
30  }
31  ComputeLexicalItemStreams(new_blocks);
32  return avail_bytes/CODE_UNIT_BYTES;
33}
34
35int Ext_ASCII_8_Lexer::BOM_size(int rel_pos) {
36  if (s3int32((unsigned char *) &parsing_engine_data->x8data) == c3int32(0xEF, 0xBB, 0xBF))
37    return 3;
38  else return 0;
39}
Note: See TracBrowser for help on using the repository browser.