source: trunk/src/multiliteral.h @ 17

Last change on this file since 17 was 17, checked in by cameron, 11 years ago

Initiating ASCII/EBCDIC selection using templates

File size: 4.5 KB
Line 
1/*  multiliteral.h - XML Multicharacter Recognizers.
2    Copyright (c) 2007, Robert D. Cameron. 
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7This file provides a library of routines for the efficient recognition
8of particular XML multicharacter sequences.  Sequences of length 2 are
9compared as 16 bit integers, sequences of length 3 or 4 are compared
10as 32 bit integers and other sequences of length up to 8 are compared as
1164 bit integers.
12
13All functions are declared inline; there is no corresponding multiliteral.c
14file required.   */
15
16#ifndef MULTILITERAL_H
17#define MULTILITERAL_H
18
19#include <assert.h>
20#include <stdint.h>
21#include "charsets/ASCII_EBCDIC.h"
22
23/*
24Helper functions.  Given 2, 4 or 8 characters comprising a sequence,
25the c2int16, c4int32, and c8int64 functions determine the corresponding
2616, 32 or 64 bit integer value.   These functions are intended to be called
27with constant arguments and hence to be applied at compile time.
28This is achieved using template metaprogramming.
29*/
30
31template <CharBase C, unsigned char c1, unsigned char c2>
32struct c2int16 {
33  static uint16_t const value =
34#if BYTE_ORDER == BIG_ENDIAN
35    (((uint16_t) Ord<C,c1>::value) << 8) + (uint16_t) Ord<C,c2>::value;
36#endif
37#if BYTE_ORDER == LITTLE_ENDIAN
38    (uint16_t) Ord<C,c1>::value + (((uint16_t) Ord<C,c2>::value) << 8);
39#endif
40};
41
42template <CharBase C, unsigned char c1, unsigned char c2,
43                      unsigned char c3, unsigned char c4>
44struct c4int32 {
45  static uint32_t const value =
46#if BYTE_ORDER == BIG_ENDIAN
47    (((uint16_t) c2int16<C,c1,c2>::value) << 16) + (uint16_t) c2int16<C,c3,c4>::value;
48#endif
49#if BYTE_ORDER == LITTLE_ENDIAN
50    (uint16_t) c2int16<C,c1,c2>::value + (((uint16_t) c2int16<C,c3,c4>::value) << 16);
51#endif
52};
53
54template <CharBase C, unsigned char c1, unsigned char c2,
55                      unsigned char c3, unsigned char c4,
56                      unsigned char c5, unsigned char c6,
57                      unsigned char c7, unsigned char c8>
58struct c8int64 {
59  static uint64_t const value =
60#if BYTE_ORDER == BIG_ENDIAN
61    ((uint64_t) c4int32<C, c1, c2, c3, c4>::value) << 32 + 
62     (uint64_t) c4int32<C, c5, c6, c7, c8>::value;
63#endif
64#if BYTE_ORDER == LITTLE_ENDIAN
65    ((uint64_t) c4int32<C, c1, c2, c3, c4>::value) + 
66    ((uint64_t) c4int32<C, c5, c6, c7, c8>::value) << 32;
67#endif
68};
69
70
71/*  Specialized helpers for 3, 5, 6, and 7 character combinations. */
72
73template <CharBase C, unsigned char c1, unsigned char c2,
74                      unsigned char c3>
75struct c3int32 {
76  static uint32_t const value = c4int32<C, c1, c2, c3, 0>::value;
77};
78
79template <CharBase C, unsigned char c1, unsigned char c2,
80                      unsigned char c3, unsigned char c4,
81                      unsigned char c5>
82struct c5int64 {
83  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, 0, 0, 0>::value;
84};
85
86template <CharBase C, unsigned char c1, unsigned char c2,
87                      unsigned char c3, unsigned char c4,
88                      unsigned char c5, unsigned char c6>
89struct c6int64 {
90  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, c6, 0, 0>::value;
91};
92
93template <CharBase C, unsigned char c1, unsigned char c2,
94                      unsigned char c3, unsigned char c4,
95                      unsigned char c5, unsigned char c6,
96                      unsigned char c7>
97struct c7int64 {
98  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, c6, c7, 0>::value;
99};
100
101
102/*
103A second set of helper functions determines 16, 32, or 64 bit integer
104values from character arrays.
105Precondition:  the character array is allocated with at least the
106number of required characters in each case. */
107static inline uint16_t s2int16(unsigned char s[]) {
108  return * ((uint16_t *) s);
109}
110
111static inline uint32_t s4int32(unsigned char s[]) {
112  return * ((uint32_t *) s);
113}
114
115static inline uint64_t s8int64(unsigned char s[]) {
116  return * ((uint64_t *) s);
117}
118
119static inline uint32_t s3int32(unsigned char s[]) {
120  return s4int32(s) & c3int32<Native, 0xFF, 0xFF, 0xFF>::value;
121}
122
123static inline uint64_t s5int64(unsigned char s[]) {
124  return s8int64(s) & c5int64<Native, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF>::value;
125}
126
127static inline uint64_t s6int64(unsigned char s[]) {
128  return s8int64(s) & c6int64<Native, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF>::value;
129}
130
131static inline uint64_t s7int64(unsigned char s[]) {
132  return s8int64(s) & c7int64<Native, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF>::value;
133}
134
135#endif
Note: See TracBrowser for help on using the repository browser.