source: trunk/src/multiliteral.h @ 20

Last change on this file since 20 was 20, checked in by cameron, 11 years ago

simplified BYTE_ORDER dependencies

File size: 4.6 KB
Line 
1/*  multiliteral.h - XML Multicharacter Recognizers.
2    Copyright (c) 2007, 2008, Robert D. Cameron. 
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7This file provides a library of routines for the efficient recognition
8of particular XML multicharacter sequences.  Sequences of length 2 are
9compared as 16 bit integers, sequences of length 3 or 4 are compared
10as 32 bit integers and other sequences of length up to 8 are compared as
1164 bit integers.  The integer value for each XML multicharacter sequence
12is determined as a compile-time constant for optimal efficiency.
13
14All functions are declared inline; there is no corresponding multiliteral.c
15file required.   */
16
17#ifndef MULTILITERAL_H
18#define MULTILITERAL_H
19
20#include <assert.h>
21#include <stdint.h>
22#include "charsets/ASCII_EBCDIC.h"
23
24#if BYTE_ORDER == BIG_ENDIAN
25const int LOW_BYTE_SHIFT = 8;
26const int HIGH_BYTE_SHIFT = 0;
27#endif
28#if BYTE_ORDER == LITTLE_ENDIAN
29const int LOW_BYTE_SHIFT = 0;
30const int HIGH_BYTE_SHIFT = 8;
31#endif
32
33/*
34Helper metafunctions.  Given 2, 4 or 8 characters comprising a sequence,
35the c2int16, c4int32, and c8int64 functions determine the corresponding
3616, 32 or 64 bit integer value.   These are template metafunctions that
37must be instantiated with constant arguments to be applied at compile time.
38The functions may be instantiated for ASCII or EBCDIC based byte
39sequences.
40For example, c2int16<ASCII, '<', '/'>::value produces the compile
41time constant for the 16-bit value of an ASCII-based byte sequence
42of the XML end tag opening delimiter.
43*/
44
45template <CharBase C, unsigned char c1, unsigned char c2>
46struct c2int16 {
47  static uint16_t const value =
48    (((uint16_t) Ord<C,c1>::value) << LOW_BYTE_SHIFT) +
49    (((uint16_t) Ord<C,c2>::value) << HIGH_BYTE_SHIFT);
50};
51
52template <CharBase C, unsigned char c1, unsigned char c2,
53                      unsigned char c3, unsigned char c4>
54struct c4int32 {
55  static uint32_t const value =
56    (((uint32_t) c2int16<C,c1,c2>::value) << (2 * LOW_BYTE_SHIFT)) + 
57    (((uint32_t) c2int16<C,c3,c4>::value) << (2 * HIGH_BYTE_SHIFT));
58};
59
60template <CharBase C, unsigned char c1, unsigned char c2,
61                      unsigned char c3, unsigned char c4,
62                      unsigned char c5, unsigned char c6,
63                      unsigned char c7, unsigned char c8>
64struct c8int64 {
65  static uint64_t const value =
66    (((uint64_t) c4int32<C, c1, c2, c3, c4>::value) << (4 * LOW_BYTE_SHIFT)) + 
67    (((uint64_t) c4int32<C, c5, c6, c7, c8>::value) << (4 * HIGH_BYTE_SHIFT));
68};
69
70
71/*  Specialized helpers for 3, 5, 6, and 7 character combinations. */
72
73template <CharBase C, unsigned char c1, unsigned char c2,
74                      unsigned char c3>
75struct c3int32 {
76  static uint32_t const value = c4int32<C, c1, c2, c3, 0>::value;
77};
78
79template <CharBase C, unsigned char c1, unsigned char c2,
80                      unsigned char c3, unsigned char c4,
81                      unsigned char c5>
82struct c5int64 {
83  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, 0, 0, 0>::value;
84};
85
86template <CharBase C, unsigned char c1, unsigned char c2,
87                      unsigned char c3, unsigned char c4,
88                      unsigned char c5, unsigned char c6>
89struct c6int64 {
90  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, c6, 0, 0>::value;
91};
92
93template <CharBase C, unsigned char c1, unsigned char c2,
94                      unsigned char c3, unsigned char c4,
95                      unsigned char c5, unsigned char c6,
96                      unsigned char c7>
97struct c7int64 {
98  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, c6, c7, 0>::value;
99};
100
101
102/*
103A second set of helper functions determines 16, 32, or 64 bit integer
104values from character arrays.
105Precondition:  the character array is allocated with at least the
106number of required characters in each case. */
107static inline uint16_t s2int16(unsigned char s[]) {
108  return * ((uint16_t *) s);
109}
110
111static inline uint32_t s4int32(unsigned char s[]) {
112  return * ((uint32_t *) s);
113}
114
115static inline uint64_t s8int64(unsigned char s[]) {
116  return * ((uint64_t *) s);
117}
118
119static inline uint32_t s3int32(unsigned char s[]) {
120  return s4int32(s) & (0xFFFFFF << LOW_BYTE_SHIFT);
121}
122
123static inline uint64_t s5int64(unsigned char s[]) {
124  return s8int64(s) & (0xFFFFFFFFFFULL << (3 * LOW_BYTE_SHIFT));
125}
126
127static inline uint64_t s6int64(unsigned char s[]) {
128  return s8int64(s) & (0xFFFFFFFFFFFFULL << (2 * LOW_BYTE_SHIFT));
129}
130
131static inline uint64_t s7int64(unsigned char s[]) {
132  return s8int64(s) & (0xFFFFFFFFFFFFFFULL << LOW_BYTE_SHIFT);
133}
134
135#endif
Note: See TracBrowser for help on using the repository browser.