source: proto/SymbolTable/dictionary_pbgs_id.py @ 1684

Last change on this file since 1684 was 1666, checked in by vla24, 8 years ago

SymbolTable?: Added natural language dictionary templates that uses symbol table

File size: 5.3 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# parabix2_compilable.py
4#
5# Parallel XML Parsing with Bitstream Addition
6#
7# - Complete prototype for all bitstream computations in Parabix2
8# - Optimized for compilation
9# - Separate compilation
10
11# Robert D. Cameron
12# July 29, 2010
13#
14
15#import bitutil
16
17class Basis_bits():
18    bit_0 = 0
19    bit_1 = 0
20    bit_2 = 0
21    bit_3 = 0
22    bit_4 = 0
23    bit_5 = 0
24    bit_6 = 0
25    bit_7 = 0
26
27class Dictionary ():
28    Words = 0
29    Word_starts = 0
30    Word_ends = 0
31    Word_ends_1 = 0
32    Word_ends_2 = 0
33    Word_ends_3 = 0
34    Word_ends_4 = 0
35    Word_ends_5 = 0
36    Word_ends_6 = 0
37    Word_ends_7 = 0
38    Word_ends_8 = 0
39    Word_ends_9 = 0
40    Word_ends_10 = 0
41    Word_ends_11 = 0
42    Word_ends_12 = 0
43    Word_ends_13 = 0
44    Word_ends_14 = 0
45    Word_ends_15 = 0
46    Word_ends_16 = 0
47    Word_ends_17_and_longer = 0
48
49class Hash_data():
50    Hash_value = 0
51
52def Classify_bytes(basis_bits, dictionary):
53    temp1 = (basis_bits.bit_6 & basis_bits.bit_7)
54    temp2 = (basis_bits.bit_5 | temp1)
55    temp3 = (basis_bits.bit_4 & temp2)
56    temp4 = (~temp3)
57    temp5 = (basis_bits.bit_4 | basis_bits.bit_5)
58    temp6 = (basis_bits.bit_6 | basis_bits.bit_7)
59    temp7 = (temp5 | temp6)
60    temp8 = ((basis_bits.bit_3 & temp4)|(~(basis_bits.bit_3) & temp7))
61    temp9 = (basis_bits.bit_1 &~ basis_bits.bit_0)
62    temp10 = (temp8 & temp9)
63    temp11 = (basis_bits.bit_0 | basis_bits.bit_1)
64    temp12 = (basis_bits.bit_2 & basis_bits.bit_3)
65    temp13 = (temp12 &~ temp11)
66    temp14 = (basis_bits.bit_5 | basis_bits.bit_6)
67    temp15 = (basis_bits.bit_4 & temp14)
68    temp16 = (temp13 &~ temp15)
69    dictionary.Words = (temp10 | temp16)
70
71    temp = pablo.Advance(dictionary.Words) ^ dictionary.Words
72    dictionary.Word_starts = temp & dictionary.Words
73    dictionary.Word_ends = temp & ~dictionary.Words
74
75def Form_Length_Group_Bitstreams(dictionary):
76
77    remaining_starts = dictionary.Word_starts
78    remaining_ends = dictionary.Word_ends
79    temp = dictionary.Word_starts
80
81    # Group symbols of length 1
82    temp = pablo.Advance(temp)
83    dictionary.Word_ends_1 = temp & remaining_ends
84    remaining_ends = remaining_ends & ~dictionary.Word_ends_1
85
86    # Group symbols of length 2
87    temp = pablo.Advance(temp)
88    dictionary.Word_ends_2 = temp & remaining_ends
89    remaining_ends = remaining_ends & ~dictionary.Word_ends_2
90
91    # Group symbols of length 3
92    temp = pablo.Advance(temp)
93    dictionary.Word_ends_3 = temp & remaining_ends
94    remaining_ends = remaining_ends & ~dictionary.Word_ends_3
95
96    # Group symbols of length 4
97    temp = pablo.Advance(temp)
98    dictionary.Word_ends_4 = temp & remaining_ends
99    remaining_ends = remaining_ends & ~dictionary.Word_ends_4
100
101    # Group symbols of length 5
102    temp = pablo.Advance(temp)
103    dictionary.Word_ends_5 = temp & remaining_ends
104    remaining_ends = remaining_ends & ~dictionary.Word_ends_5
105
106    # Group symbols of length 6
107    temp = pablo.Advance(temp)
108    dictionary.Word_ends_6 = temp & remaining_ends
109    remaining_ends = remaining_ends & ~dictionary.Word_ends_6
110
111    # Group symbols of length 7
112    temp = pablo.Advance(temp)
113    dictionary.Word_ends_7 = temp & remaining_ends
114    remaining_ends = remaining_ends & ~dictionary.Word_ends_7
115
116    # Group symbols of length 8
117    temp = pablo.Advance(temp)
118    dictionary.Word_ends_8 = temp & remaining_ends
119    remaining_ends = remaining_ends & ~dictionary.Word_ends_8
120
121    # Group symbols of length 9
122    temp = pablo.Advance(temp)
123    dictionary.Word_ends_9 = temp & remaining_ends
124    remaining_ends = remaining_ends & ~dictionary.Word_ends_9
125
126    # Group symbols of length 10
127    temp = pablo.Advance(temp)
128    dictionary.Word_ends_10 = temp & remaining_ends
129    remaining_ends = remaining_ends & ~dictionary.Word_ends_10
130
131    # Group symbols of length 11
132    temp = pablo.Advance(temp)
133    dictionary.Word_ends_11 = temp & remaining_ends
134    remaining_ends = remaining_ends & ~dictionary.Word_ends_11
135
136    # Group symbols of length 12
137    temp = pablo.Advance(temp)
138    dictionary.Word_ends_12 = temp & remaining_ends
139    remaining_ends = remaining_ends & ~dictionary.Word_ends_12
140
141    # Group symbols of length 13
142    temp = pablo.Advance(temp)
143    dictionary.Word_ends_13 = temp & remaining_ends
144    remaining_ends = remaining_ends & ~dictionary.Word_ends_13
145
146    # Group symbols of length 14
147    temp = pablo.Advance(temp)
148    dictionary.Word_ends_14 = temp & remaining_ends
149    remaining_ends = remaining_ends & ~dictionary.Word_ends_14
150
151    # Group symbols of length 15
152    temp = pablo.Advance(temp)
153    dictionary.Word_ends_15 = temp & remaining_ends
154    remaining_ends = remaining_ends & ~dictionary.Word_ends_15
155
156    # Group symbols of length 16
157    temp = pablo.Advance(temp)
158    dictionary.Word_ends_16 = temp & remaining_ends
159    remaining_ends = remaining_ends & ~dictionary.Word_ends_16
160
161    # Group symbols of length 17 and longer
162    dictionary.Word_ends_17_and_longer = remaining_ends
163
164def Compute_Hash_Value_Bitstream(hash_data, basis_bits):
165    hash_data.Hash_value = basis_bits.bit_2 ^ basis_bits.bit_4 ^ basis_bits.bit_6
166    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
167
168def Main(basis_bits, dictionary, hash_data):
169    Classify_bytes(basis_bits, dictionary)
170
171    # These methods are needed to do Paralel Bitstream Based Length Sorting
172    Form_Length_Group_Bitstreams(dictionary)
173    Compute_Hash_Value_Bitstream(hash_data, basis_bits);
174               
Note: See TracBrowser for help on using the repository browser.