source: trunk/src/multiliteral.h @ 4

Last change on this file since 4 was 4, checked in by cameron, 11 years ago

Initial import of parabix-0.36

File size: 3.2 KB
Line 
1/*  multiliteral.h - XML Multicharacter Recognizers.
2    Copyright (c) 2007, Robert D. Cameron. 
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7This file provides a library of routines for the efficient recognition
8of particular XML multicharacter sequences.  Sequences of length 2 are
9compared as 16 bit integers, sequences of length 3 or 4 are compared
10as 32 bit integers and other sequences of length up to 8 are compared as
1164 bit integers.
12
13All functions are declared inline; there is no corresponding multiliteral.c
14file required.   */
15
16#ifndef MULTILITERAL_H
17#define MULTILITERAL_H
18
19#include <assert.h>
20#include <stdint.h>
21
22/*
23Helper functions.  Given 2, 4 or 8 characters comprising a sequence,
24the c2int16, c4int32, and c8int64 functions determine the corresponding
2516, 32 or 64 bit integer value.   These functions are intended to be called
26with constant arguments and hence to be applied at compile time.
27*/
28
29static inline uint16_t c2int16(unsigned char c1, unsigned char c2) {
30#if BYTE_ORDER == BIG_ENDIAN
31  return (((uint16_t) c1) << 8) + (uint16_t) c2;
32#endif
33#if BYTE_ORDER == LITTLE_ENDIAN
34  return (uint16_t) c1 + (((uint16_t) c2) << 8);
35#endif
36}
37
38static inline uint32_t c4int32(unsigned char c1, unsigned char c2, 
39                        unsigned char c3, unsigned char c4) {
40#if BYTE_ORDER == BIG_ENDIAN
41  return (((uint32_t) c2int16(c1, c2)) << 16) + (uint32_t) c2int16(c3, c4);
42#endif
43#if BYTE_ORDER == LITTLE_ENDIAN
44  return ((uint32_t) c2int16(c1, c2)) + (((uint32_t) c2int16(c3, c4)) << 16);
45#endif
46}
47
48static inline uint64_t c8int64(unsigned char c1, unsigned char c2, 
49                        unsigned char c3, unsigned char c4,
50                        unsigned char c5, unsigned char c6,
51                        unsigned char c7, unsigned char c8) {
52#if BYTE_ORDER == BIG_ENDIAN
53  return ((uint64_t) c4int32(c1, c2, c3, c4)) << 32 + 
54          (uint64_t) c4int32(c5, c6, c7, c8);
55#endif
56#if BYTE_ORDER == LITTLE_ENDIAN
57  return ((uint64_t) c4int32(c1, c2, c3, c4)) + 
58         (((uint64_t) c4int32(c5, c6, c7, c8)) << 32);
59#endif
60}
61
62/*  Specialized helpers for 3 and 5 character combinations. */
63
64static inline uint32_t c3int32(unsigned char c1, unsigned char c2,
65                               unsigned char c3) {
66  return c4int32(c1, c2, c3, 0);
67}
68
69static inline uint64_t c5int64(unsigned char c1, unsigned char c2,
70                               unsigned char c3, unsigned char c4,
71                               unsigned char c5) {
72  return c8int64(c1, c2, c3, c4, c5, 0, 0, 0);
73}
74
75/*
76A second set of helper functions determines 16, 32, or 64 bit integer
77values from character arrays.
78Precondition:  the character array is allocated with at least the
79number of required characters in each case. */
80static inline uint16_t s2int16(unsigned char s[]) {
81  return * ((uint16_t *) s);
82}
83
84static inline uint32_t s4int32(unsigned char s[]) {
85  return * ((uint32_t *) s);
86}
87
88static inline uint64_t s8int64(unsigned char s[]) {
89  return * ((uint64_t *) s);
90}
91
92static inline uint32_t s3int32(unsigned char s[]) {
93  return s4int32(s) & c3int32(0xFF, 0xFF, 0xFF);
94}
95
96static inline uint64_t s5int64(unsigned char s[]) {
97  return s8int64(s) & c5int64(0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
98}
99
100#endif
Note: See TracBrowser for help on using the repository browser.