source: trunk/src/charsets/ext_ascii_16.h @ 4

Last change on this file since 4 was 4, checked in by cameron, 12 years ago

Initial import of parabix-0.36

File size: 1.4 KB
Line 
1/*  ext_ascii_16.h - Lexer object for 16-bit ASCII-based character sets.
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8#ifndef EXT_ASCII_16_H
9#define EXT_ASCII_16_H
10#include "../bitlex.h"
11
12class Ext_ASCII_16_Lexer : public Lexer {
13public:
14   Ext_ASCII_16_Lexer(XML_Buffer *b, ParallelStreamSet *p);
15   virtual int AdvanceBuffer(int new_code_unit_position) = 0;
16   int BOM_size(int rel_pos);
17protected:
18   BytePack x16hi[BUFFER_PACKS+LOOKAHEAD_PACKS];
19   BytePack x16lo[BUFFER_PACKS+LOOKAHEAD_PACKS];
20};
21
22class Ext_ASCII_16LE_Lexer : public Ext_ASCII_16_Lexer {
23public:
24   Ext_ASCII_16LE_Lexer(XML_Buffer *b, ParallelStreamSet *p);
25   int AdvanceBuffer(int new_code_unit_position);
26};
27
28class Ext_ASCII_16BE_Lexer : public Ext_ASCII_16_Lexer {
29public:
30   Ext_ASCII_16BE_Lexer(XML_Buffer *b, ParallelStreamSet *p);
31   int AdvanceBuffer(int new_code_unit_position);
32};
33
34// Identify all bytes in the range 0xD8 through 0xDF.
35inline BytePack mark_surrogates(BytePack u16hi, BytePack u16lo) {
36  return simd_eq_8(simd_andc(u16hi, simd_const_8(0x07)),
37                   simd_const_8(0xD8));
38}
39
40// Identify all pairs of bytes either FFFE or FFFF.
41inline BytePack mark_FFFE_FFFF(BytePack u16hi, BytePack u16lo) {
42  return simd_eq_8(simd_and(u16hi, simd_or(u16lo, simd_const_8(1))),
43                   simd_const_8(0xFF));
44}
45
46
47
48#endif
Note: See TracBrowser for help on using the repository browser.