source: trunk/src/multiliteral.h @ 25

Last change on this file since 25 was 25, checked in by cameron, 12 years ago

Update for xmlparam.h

File size: 4.6 KB
Line 
1/*  multiliteral.h - XML Multicharacter Recognizers.
2    Copyright (c) 2007, 2008, Robert D. Cameron. 
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7This file provides a library of routines for the efficient recognition
8of particular XML multicharacter sequences.  Sequences of length 2 are
9compared as 16 bit integers, sequences of length 3 or 4 are compared
10as 32 bit integers and other sequences of length up to 8 are compared as
1164 bit integers.  The integer value for each XML multicharacter sequence
12is determined as a compile-time constant for optimal efficiency.
13
14All functions are declared inline; there is no corresponding multiliteral.c
15file required.   */
16
17#ifndef MULTILITERAL_H
18#define MULTILITERAL_H
19
20#include <assert.h>
21#include <stdint.h>
22#include "xmlparam.h"
23#include "charsets/ASCII_EBCDIC.h"
24
25#if BYTE_ORDER == BIG_ENDIAN
26const int LOW_BYTE_SHIFT = 8;
27const int HIGH_BYTE_SHIFT = 0;
28#endif
29#if BYTE_ORDER == LITTLE_ENDIAN
30const int LOW_BYTE_SHIFT = 0;
31const int HIGH_BYTE_SHIFT = 8;
32#endif
33
34/*
35Helper metafunctions.  Given 2, 4 or 8 characters comprising a sequence,
36the c2int16, c4int32, and c8int64 functions determine the corresponding
3716, 32 or 64 bit integer value.   These are template metafunctions that
38must be instantiated with constant arguments to be applied at compile time.
39The functions may be instantiated for ASCII or EBCDIC based byte
40sequences.
41For example, c2int16<ASCII, '<', '/'>::value produces the compile
42time constant for the 16-bit value of an ASCII-based byte sequence
43of the XML end tag opening delimiter.
44*/
45
46template <CodeUnit_Base C, unsigned char c1, unsigned char c2>
47struct c2int16 {
48  static uint16_t const value =
49    (((uint16_t) Ord<C,c1>::value) << LOW_BYTE_SHIFT) +
50    (((uint16_t) Ord<C,c2>::value) << HIGH_BYTE_SHIFT);
51};
52
53template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
54                      unsigned char c3, unsigned char c4>
55struct c4int32 {
56  static uint32_t const value =
57    (((uint32_t) c2int16<C,c1,c2>::value) << (2 * LOW_BYTE_SHIFT)) + 
58    (((uint32_t) c2int16<C,c3,c4>::value) << (2 * HIGH_BYTE_SHIFT));
59};
60
61template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
62                      unsigned char c3, unsigned char c4,
63                      unsigned char c5, unsigned char c6,
64                      unsigned char c7, unsigned char c8>
65struct c8int64 {
66  static uint64_t const value =
67    (((uint64_t) c4int32<C, c1, c2, c3, c4>::value) << (4 * LOW_BYTE_SHIFT)) + 
68    (((uint64_t) c4int32<C, c5, c6, c7, c8>::value) << (4 * HIGH_BYTE_SHIFT));
69};
70
71
72/*  Specialized helpers for 3, 5, 6, and 7 character combinations. */
73
74template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
75                      unsigned char c3>
76struct c3int32 {
77  static uint32_t const value = c4int32<C, c1, c2, c3, 0>::value;
78};
79
80template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
81                      unsigned char c3, unsigned char c4,
82                      unsigned char c5>
83struct c5int64 {
84  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, 0, 0, 0>::value;
85};
86
87template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
88                      unsigned char c3, unsigned char c4,
89                      unsigned char c5, unsigned char c6>
90struct c6int64 {
91  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, c6, 0, 0>::value;
92};
93
94template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
95                      unsigned char c3, unsigned char c4,
96                      unsigned char c5, unsigned char c6,
97                      unsigned char c7>
98struct c7int64 {
99  static uint64_t const value = c8int64<C, c1, c2, c3, c4, c5, c6, c7, 0>::value;
100};
101
102
103/*
104A second set of helper functions determines 16, 32, or 64 bit integer
105values from character arrays.
106Precondition:  the character array is allocated with at least the
107number of required characters in each case. */
108static inline uint16_t s2int16(unsigned char s[]) {
109  return * ((uint16_t *) s);
110}
111
112static inline uint32_t s4int32(unsigned char s[]) {
113  return * ((uint32_t *) s);
114}
115
116static inline uint64_t s8int64(unsigned char s[]) {
117  return * ((uint64_t *) s);
118}
119
120static inline uint32_t s3int32(unsigned char s[]) {
121  return s4int32(s) & (0xFFFFFF << LOW_BYTE_SHIFT);
122}
123
124static inline uint64_t s5int64(unsigned char s[]) {
125  return s8int64(s) & (0xFFFFFFFFFFULL << (3 * LOW_BYTE_SHIFT));
126}
127
128static inline uint64_t s6int64(unsigned char s[]) {
129  return s8int64(s) & (0xFFFFFFFFFFFFULL << (2 * LOW_BYTE_SHIFT));
130}
131
132static inline uint64_t s7int64(unsigned char s[]) {
133  return s8int64(s) & (0xFFFFFFFFFFFFFFULL << LOW_BYTE_SHIFT);
134}
135
136#endif
Note: See TracBrowser for help on using the repository browser.