source: proto/SymbolTable/dictionary_pbgs_id_adv.py @ 1684

Last change on this file since 1684 was 1666, checked in by vla24, 8 years ago

SymbolTable?: Added natural language dictionary templates that uses symbol table

File size: 5.3 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# parabix2_compilable.py
4#
5# Parallel XML Parsing with Bitstream Addition
6#
7# - Complete prototype for all bitstream computations in Parabix2
8# - Optimized for compilation
9# - Separate compilation
10
11# Robert D. Cameron
12# July 29, 2010
13#
14
15#import bitutil
16
17class Basis_bits():
18    bit_0 = 0
19    bit_1 = 0
20    bit_2 = 0
21    bit_3 = 0
22    bit_4 = 0
23    bit_5 = 0
24    bit_6 = 0
25    bit_7 = 0
26
27class Dictionary ():
28    Words = 0
29    Word_starts = 0
30    Word_ends = 0
31    Word_ends_1 = 0
32    Word_ends_2 = 0
33    Word_ends_3 = 0
34    Word_ends_4 = 0
35    Word_ends_5 = 0
36    Word_ends_6 = 0
37    Word_ends_7 = 0
38    Word_ends_8 = 0
39    Word_ends_9 = 0
40    Word_ends_10 = 0
41    Word_ends_11 = 0
42    Word_ends_12 = 0
43    Word_ends_13 = 0
44    Word_ends_14 = 0
45    Word_ends_15 = 0
46    Word_ends_16 = 0
47    Word_ends_17_and_longer = 0
48
49class Hash_data():
50    Hash_value = 0
51
52def Classify_bytes(basis_bits, dictionary):
53    temp1 = (basis_bits.bit_6 & basis_bits.bit_7)
54    temp2 = (basis_bits.bit_5 | temp1)
55    temp3 = (basis_bits.bit_4 & temp2)
56    temp4 = (~temp3)
57    temp5 = (basis_bits.bit_4 | basis_bits.bit_5)
58    temp6 = (basis_bits.bit_6 | basis_bits.bit_7)
59    temp7 = (temp5 | temp6)
60    temp8 = ((basis_bits.bit_3 & temp4)|(~(basis_bits.bit_3) & temp7))
61    temp9 = (basis_bits.bit_1 &~ basis_bits.bit_0)
62    temp10 = (temp8 & temp9)
63    temp11 = (basis_bits.bit_0 | basis_bits.bit_1)
64    temp12 = (basis_bits.bit_2 & basis_bits.bit_3)
65    temp13 = (temp12 &~ temp11)
66    temp14 = (basis_bits.bit_5 | basis_bits.bit_6)
67    temp15 = (basis_bits.bit_4 & temp14)
68    temp16 = (temp13 &~ temp15)
69    dictionary.Words = (temp10 | temp16)
70
71    temp = pablo.Advance(dictionary.Words) ^ dictionary.Words
72    dictionary.Word_starts = temp & dictionary.Words
73    dictionary.Word_ends = temp & ~dictionary.Words
74
75def Form_Length_Group_Bitstreams(dictionary):
76
77    remaining_starts = dictionary.Word_starts
78    remaining_ends = dictionary.Word_ends
79    temp = dictionary.Word_starts
80    temp32 = pablo.Advance32(temp)
81
82    # Group symbols of length 1
83    dictionary.Word_ends_1 = interpose32(temp, temp32, 1) & remaining_ends
84    remaining_ends = remaining_ends & ~dictionary.Word_ends_1
85
86    # Group symbols of length 2
87    dictionary.Word_ends_2 = interpose32(temp, temp32, 2) & remaining_ends
88    remaining_ends = remaining_ends & ~dictionary.Word_ends_2
89
90    # Group symbols of length 3
91    dictionary.Word_ends_3 = interpose32(temp, temp32, 3) & remaining_ends
92    remaining_ends = remaining_ends & ~dictionary.Word_ends_3
93
94    # Group symbols of length 4
95    dictionary.Word_ends_4 = interpose32(temp, temp32, 4) & remaining_ends
96    remaining_ends = remaining_ends & ~dictionary.Word_ends_4
97
98    # Group symbols of length 5
99    dictionary.Word_ends_5 = interpose32(temp, temp32, 5) & remaining_ends
100    remaining_ends = remaining_ends & ~dictionary.Word_ends_5
101
102    # Group symbols of length 6
103    dictionary.Word_ends_6 = interpose32(temp, temp32, 6) & remaining_ends
104    remaining_ends = remaining_ends & ~dictionary.Word_ends_6
105
106    # Group symbols of length 7
107    dictionary.Word_ends_7 = interpose32(temp, temp32, 7) & remaining_ends
108    remaining_ends = remaining_ends & ~dictionary.Word_ends_7
109
110    # Group symbols of length 8
111    dictionary.Word_ends_8 = interpose32(temp, temp32, 8) & remaining_ends
112    remaining_ends = remaining_ends & ~dictionary.Word_ends_8
113
114    # Group symbols of length 9
115    dictionary.Word_ends_9 = interpose32(temp, temp32, 9) & remaining_ends
116    remaining_ends = remaining_ends & ~dictionary.Word_ends_9
117
118    # Group symbols of length 10
119    dictionary.Word_ends_10 = interpose32(temp, temp32, 10) & remaining_ends
120    remaining_ends = remaining_ends & ~dictionary.Word_ends_10
121
122    # Group symbols of length 11
123    dictionary.Word_ends_11 = interpose32(temp, temp32, 11) & remaining_ends
124    remaining_ends = remaining_ends & ~dictionary.Word_ends_11
125
126    # Group symbols of length 12
127    dictionary.Word_ends_12 = interpose32(temp, temp32, 12) & remaining_ends
128    remaining_ends = remaining_ends & ~dictionary.Word_ends_12
129
130    # Group symbols of length 13
131    dictionary.Word_ends_13 = interpose32(temp, temp32, 13) & remaining_ends
132    remaining_ends = remaining_ends & ~dictionary.Word_ends_13
133
134    # Group symbols of length 14
135    dictionary.Word_ends_14 = interpose32(temp, temp32, 14) & remaining_ends
136    remaining_ends = remaining_ends & ~dictionary.Word_ends_14
137
138    # Group symbols of length 15
139    temp15 = interpose32(temp, temp32, 15)
140    dictionary.Word_ends_15 = temp15 & remaining_ends
141    remaining_ends = remaining_ends & ~dictionary.Word_ends_15
142
143    # Group symbols of length 16
144    temp = pablo.Advance(temp15)
145    dictionary.Word_ends_16 = temp & remaining_ends
146    remaining_ends = remaining_ends & ~dictionary.Word_ends_16
147
148    # Group symbols of length 17 and longer
149    dictionary.Word_ends_17_and_longer = remaining_ends
150
151def Compute_Hash_Value_Bitstream(hash_data, basis_bits):
152    hash_data.Hash_value = basis_bits.bit_2 ^ basis_bits.bit_4 ^ basis_bits.bit_6
153    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
154
155def Main(basis_bits, dictionary, hash_data):
156    Classify_bytes(basis_bits, dictionary)
157
158    # These methods are needed to do Paralel Bitstream Based Length Sorting
159    Form_Length_Group_Bitstreams(dictionary)
160    Compute_Hash_Value_Bitstream(hash_data, basis_bits);
161               
Note: See TracBrowser for help on using the repository browser.