source: trunk/src/bitlex.c @ 7

Last change on this file since 7 was 7, checked in by cameron, 12 years ago

Reference extraction; checking ]]> in text; < in atts.

File size: 4.4 KB
Line 
1/*  bitlex - Parabix lexical analysis common routines.
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7    These are common routines for all ASCII-family character sets.
8    They are used by the character-set specific Lexer objects
9    found in the charsets directory.
10*/
11#include "bitlex.h"
12
13#include "transpose.h"
14
15Lexer::Lexer(XML_Buffer *b, ParallelStreamSet *p) {
16  xml_buf = b;
17  parsing_engine_data = p;
18};
19
20/* Given the bit[] array of one BitBlock each for the 8 bits of
21   an ASCII-family character representation, compute the parallel
22   lexical item streams needed for XML parsing.
23
24   WARNING: the following is generated code by charset_compiler.py.
25   Do not edit.
26
27*/
28static inline void ComputeLexicalItemBlocks(BitBlock bit[], BitBlock LexItem[]) {
29  BitBlock temp1 = simd_or(bit[0], bit[1]);
30  BitBlock temp2 = simd_andc(bit[2], bit[3]);
31  BitBlock temp3 = simd_andc(temp2, temp1);
32  BitBlock temp4 = simd_andc(bit[5], bit[4]);
33  BitBlock temp5 = simd_andc(bit[6], bit[7]);
34  BitBlock temp6 = simd_and(temp4, temp5);
35  BitBlock RefStart = simd_and(temp3, temp6);
36  BitBlock temp7 = simd_and(bit[2], bit[3]);
37  BitBlock temp8 = simd_andc(temp7, temp1);
38  BitBlock temp9 = simd_andc(bit[4], bit[5]);
39  BitBlock temp10 = simd_and(bit[6], bit[7]);
40  BitBlock temp11 = simd_and(temp9, temp10);
41  BitBlock Semicolon = simd_and(temp8, temp11);
42  BitBlock temp12 = simd_and(bit[4], bit[5]);
43  BitBlock temp13 = simd_or(bit[6], bit[7]);
44  BitBlock temp14 = simd_andc(temp12, temp13);
45  BitBlock LAngle = simd_and(temp8, temp14);
46  BitBlock temp15 = simd_and(temp12, temp5);
47  LexItem[RAngle] = simd_and(temp8, temp15);
48  BitBlock temp16 = simd_andc(bit[1], bit[0]);
49  BitBlock temp17 = simd_andc(bit[3], bit[2]);
50  BitBlock temp18 = simd_and(temp16, temp17);
51  BitBlock temp19 = simd_andc(bit[7], bit[6]);
52  BitBlock temp20 = simd_and(temp12, temp19);
53  LexItem[RBracket] = simd_and(temp18, temp20);
54  LexItem[Hyphen] = simd_and(temp3, temp20);
55  BitBlock temp21 = simd_and(temp12, temp10);
56  LexItem[QMark] = simd_and(temp8, temp21);
57  BitBlock Equals = simd_and(temp8, temp20);
58  BitBlock temp22 = simd_and(temp4, temp10);
59  LexItem[SQuote] = simd_and(temp3, temp22);
60  BitBlock temp23 = simd_or(bit[4], bit[5]);
61  BitBlock temp24 = simd_andc(temp5, temp23);
62  LexItem[DQuote] = simd_and(temp3, temp24);
63  BitBlock temp25 = simd_or(temp1, bit[2]);
64  BitBlock Control = simd_andc(simd_const_1(1), temp25);
65  BitBlock temp26 = simd_or(temp23, temp13);
66  BitBlock temp27 = simd_andc(temp3, temp26);
67  BitBlock temp28 = simd_or(bit[2], bit[3]);
68  BitBlock temp29 = simd_or(temp1, temp28);
69  BitBlock temp30 = simd_andc(temp20, temp29);
70  BitBlock temp31 = simd_or(temp27, temp30);
71  BitBlock temp32 = simd_and(temp9, temp19);
72  BitBlock temp33 = simd_andc(temp32, temp29);
73  BitBlock temp34 = simd_or(temp31, temp33);
74  BitBlock temp35 = simd_and(temp9, temp5);
75  BitBlock temp36 = simd_andc(temp35, temp29);
76  BitBlock WhiteSpace = simd_or(temp34, temp36);
77  BitBlock Slash = simd_and(temp3, temp21);
78  BitBlock AttScan = simd_or(LAngle, RefStart);
79  LexItem[SQuote] = simd_or(LexItem[SQuote], AttScan);
80  LexItem[DQuote] = simd_or(LexItem[DQuote], AttScan);
81  LexItem[MarkupStart] = simd_or(simd_or(LAngle, RefStart), LexItem[RBracket]);
82  LexItem[NonWS] = simd_not(WhiteSpace);
83  LexItem[NameFollow] = simd_or(simd_or(simd_or(WhiteSpace, Semicolon), 
84                                        simd_or(Slash, LexItem[RAngle])),
85                                simd_or(Equals, LexItem[QMark]));
86}
87
88
89/* A temporary structure for internal use in ComputeLexicalItemStreams. */
90typedef struct {
91  BitBlock bit[8];
92  BitBlock LexicalItems[LexicalItemCount];
93} LexicalItemBlock;
94
95
96void Lexer::ComputeLexicalItemStreams(int new_blocks) {
97  LexicalItemBlock lx_blk[BUFFER_BLOCKS];
98  for (int i = 0; i < new_blocks; i++) {
99    s2p_bytepack(&(parsing_engine_data->x8data[i * 8]), lx_blk[i].bit);
100    ComputeLexicalItemBlocks(lx_blk[i].bit, lx_blk[i].LexicalItems);
101  }
102#ifdef BUFFER_PROFILING
103  end_BOM_interval(bitstream_timer);
104  start_BOM_interval(lextranspose_timer);
105#endif
106  for (int j = MarkupStart; j < LexicalItemCount; j++) {
107    for (int i = 0; i < BUFFER_BLOCKS; i++) {
108      parsing_engine_data->item_stream[j][i] = lx_blk[i].LexicalItems[j];
109    }
110  }
111#ifdef BUFFER_PROFILING
112  end_BOM_interval(lextranspose_timer);
113  start_BOM_interval(scanner_timer);
114#endif
115}
Note: See TracBrowser for help on using the repository browser.