source: trunk/src/multiliteral.h @ 14

Last change on this file since 14 was 11, checked in by cameron, 11 years ago

Reading XML declaration: version/encoding.

File size: 4.0 KB
RevLine 
[4]1/*  multiliteral.h - XML Multicharacter Recognizers.
2    Copyright (c) 2007, Robert D. Cameron. 
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7This file provides a library of routines for the efficient recognition
8of particular XML multicharacter sequences.  Sequences of length 2 are
9compared as 16 bit integers, sequences of length 3 or 4 are compared
10as 32 bit integers and other sequences of length up to 8 are compared as
1164 bit integers.
12
13All functions are declared inline; there is no corresponding multiliteral.c
14file required.   */
15
16#ifndef MULTILITERAL_H
17#define MULTILITERAL_H
18
19#include <assert.h>
20#include <stdint.h>
21
22/*
23Helper functions.  Given 2, 4 or 8 characters comprising a sequence,
24the c2int16, c4int32, and c8int64 functions determine the corresponding
2516, 32 or 64 bit integer value.   These functions are intended to be called
26with constant arguments and hence to be applied at compile time.
27*/
28
29static inline uint16_t c2int16(unsigned char c1, unsigned char c2) {
30#if BYTE_ORDER == BIG_ENDIAN
31  return (((uint16_t) c1) << 8) + (uint16_t) c2;
32#endif
33#if BYTE_ORDER == LITTLE_ENDIAN
34  return (uint16_t) c1 + (((uint16_t) c2) << 8);
35#endif
36}
37
38static inline uint32_t c4int32(unsigned char c1, unsigned char c2, 
39                        unsigned char c3, unsigned char c4) {
40#if BYTE_ORDER == BIG_ENDIAN
41  return (((uint32_t) c2int16(c1, c2)) << 16) + (uint32_t) c2int16(c3, c4);
42#endif
43#if BYTE_ORDER == LITTLE_ENDIAN
44  return ((uint32_t) c2int16(c1, c2)) + (((uint32_t) c2int16(c3, c4)) << 16);
45#endif
46}
47
48static inline uint64_t c8int64(unsigned char c1, unsigned char c2, 
49                        unsigned char c3, unsigned char c4,
50                        unsigned char c5, unsigned char c6,
51                        unsigned char c7, unsigned char c8) {
52#if BYTE_ORDER == BIG_ENDIAN
53  return ((uint64_t) c4int32(c1, c2, c3, c4)) << 32 + 
54          (uint64_t) c4int32(c5, c6, c7, c8);
55#endif
56#if BYTE_ORDER == LITTLE_ENDIAN
57  return ((uint64_t) c4int32(c1, c2, c3, c4)) + 
58         (((uint64_t) c4int32(c5, c6, c7, c8)) << 32);
59#endif
60}
61
[11]62/*  Specialized helpers for 3, 5, 6, and 7 character combinations. */
[4]63
64static inline uint32_t c3int32(unsigned char c1, unsigned char c2,
65                               unsigned char c3) {
66  return c4int32(c1, c2, c3, 0);
67}
68
69static inline uint64_t c5int64(unsigned char c1, unsigned char c2,
70                               unsigned char c3, unsigned char c4,
71                               unsigned char c5) {
72  return c8int64(c1, c2, c3, c4, c5, 0, 0, 0);
73}
74
[11]75static inline uint64_t c6int64(unsigned char c1, unsigned char c2,
76                               unsigned char c3, unsigned char c4,
77                               unsigned char c5, unsigned char c6) {
78  return c8int64(c1, c2, c3, c4, c5, c6, 0, 0);
79}
80
81static inline uint64_t c7int64(unsigned char c1, unsigned char c2,
82                               unsigned char c3, unsigned char c4,
83                               unsigned char c5, unsigned char c6,
84                               unsigned char c7) {
85  return c8int64(c1, c2, c3, c4, c5, c6, c7, 0);
86}
87
[4]88/*
89A second set of helper functions determines 16, 32, or 64 bit integer
90values from character arrays.
91Precondition:  the character array is allocated with at least the
92number of required characters in each case. */
93static inline uint16_t s2int16(unsigned char s[]) {
94  return * ((uint16_t *) s);
95}
96
97static inline uint32_t s4int32(unsigned char s[]) {
98  return * ((uint32_t *) s);
99}
100
101static inline uint64_t s8int64(unsigned char s[]) {
102  return * ((uint64_t *) s);
103}
104
105static inline uint32_t s3int32(unsigned char s[]) {
106  return s4int32(s) & c3int32(0xFF, 0xFF, 0xFF);
107}
108
109static inline uint64_t s5int64(unsigned char s[]) {
110  return s8int64(s) & c5int64(0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
111}
112
[11]113static inline uint64_t s6int64(unsigned char s[]) {
114  return s8int64(s) & c6int64(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
115}
116
117static inline uint64_t s7int64(unsigned char s[]) {
118  return s8int64(s) & c7int64(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
119}
120
[4]121#endif
Note: See TracBrowser for help on using the repository browser.