source: proto/SymbolTable/dictionary_pbgs_log.py @ 1684

Last change on this file since 1684 was 1666, checked in by vla24, 8 years ago

SymbolTable?: Added natural language dictionary templates that uses symbol table

File size: 3.8 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# parabix2_compilable.py
4#
5# Parallel XML Parsing with Bitstream Addition
6#
7# - Complete prototype for all bitstream computations in Parabix2
8# - Optimized for compilation
9# - Separate compilation
10
11# Robert D. Cameron
12# July 29, 2010
13#
14
15#import bitutil
16
17class Basis_bits():
18    bit_0 = 0
19    bit_1 = 0
20    bit_2 = 0
21    bit_3 = 0
22    bit_4 = 0
23    bit_5 = 0
24    bit_6 = 0
25    bit_7 = 0
26
27class Dictionary ():
28    Words = 0
29    Word_starts = 0
30    Word_ends = 0
31    Word_ends_1 = 0
32    Word_ends_2 = 0
33    Word_ends_3_to_4 = 0
34    Word_ends_5_to_8 = 0
35    Word_ends_9_to_16 = 0
36    Word_ends_17_and_longer = 0
37
38class Hash_data():
39    Hash_value = 0
40
41def Classify_bytes(basis_bits, dictionary):
42    temp1 = (basis_bits.bit_6 & basis_bits.bit_7)
43    temp2 = (basis_bits.bit_5 | temp1)
44    temp3 = (basis_bits.bit_4 & temp2)
45    temp4 = (~temp3)
46    temp5 = (basis_bits.bit_4 | basis_bits.bit_5)
47    temp6 = (basis_bits.bit_6 | basis_bits.bit_7)
48    temp7 = (temp5 | temp6)
49    temp8 = ((basis_bits.bit_3 & temp4)|(~(basis_bits.bit_3) & temp7))
50    temp9 = (basis_bits.bit_1 &~ basis_bits.bit_0)
51    temp10 = (temp8 & temp9)
52    temp11 = (basis_bits.bit_0 | basis_bits.bit_1)
53    temp12 = (basis_bits.bit_2 & basis_bits.bit_3)
54    temp13 = (temp12 &~ temp11)
55    temp14 = (basis_bits.bit_5 | basis_bits.bit_6)
56    temp15 = (basis_bits.bit_4 & temp14)
57    temp16 = (temp13 &~ temp15)
58    dictionary.Words = (temp10 | temp16)
59
60    temp = pablo.Advance(dictionary.Words) ^ dictionary.Words
61    dictionary.Word_starts = temp & dictionary.Words
62    dictionary.Word_ends = temp & ~dictionary.Words
63
64def Form_Length_Group_Bitstreams(dictionary):
65
66    starts = dictionary.Word_starts
67    remaining_ends = dictionary.Word_ends
68
69    # Group symbols of length 1
70    starts_1 = pablo.Advance(starts)
71    dictionary.Word_ends_1 = starts_1 & remaining_ends
72    remaining_ends = remaining_ends & ~dictionary.Word_ends_1
73
74    # Group symbols of length 2
75    starts_2 = pablo.Advance(starts_1)
76    dictionary.Word_ends_2 = starts_2 & remaining_ends
77    remaining_ends = remaining_ends & ~dictionary.Word_ends_2
78
79    # Group symbols of length 3_to_4
80    temp = starts_1 | starts_2
81    temp32 = pablo.Advance32(temp)
82    #starts_3_to_4 = pablo.Advance(pablo.Advance(temp))
83    starts_3_to_4 = interpose32 (temp, temp32, 2)
84    dictionary.Word_ends_3_to_4 = starts_3_to_4 & remaining_ends
85    remaining_ends = remaining_ends & ~dictionary.Word_ends_3_to_4
86
87    # Group symbols of length 5_to_8
88    temp = temp | starts_3_to_4
89    temp32 = pablo.Advance32(temp)
90    #starts_5_to_8 = pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(temp))))
91    starts_5_to_8 = interpose32 (temp, temp32, 4)
92    dictionary.Word_ends_5_to_8 = starts_5_to_8 & remaining_ends
93    remaining_ends = remaining_ends & ~dictionary.Word_ends_5_to_8
94
95    # Group symbols of length 9_to_16
96    temp = temp | starts_5_to_8
97    temp32 = pablo.Advance32 (temp)
98    #starts_9_to_16 = pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(temp))))))))
99    starts_9_to_16 = interpose32 (temp, temp32, 8)
100    dictionary.Word_ends_9_to_16 = starts_9_to_16 & remaining_ends
101    remaining_ends = remaining_ends & ~dictionary.Word_ends_9_to_16
102
103    # Group symbols of length 17 and longer
104    dictionary.Word_ends_17_and_longer = remaining_ends
105
106def Compute_Hash_Value_Bitstream(hash_data, basis_bits):
107    hash_data.Hash_value = basis_bits.bit_2 ^ basis_bits.bit_4 ^ basis_bits.bit_6
108    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
109
110def Main(basis_bits, dictionary, hash_data):
111    Classify_bytes(basis_bits, dictionary)
112
113    # These methods are needed to do Paralel Bitstream Based Length Sorting
114    Form_Length_Group_Bitstreams(dictionary)
115    Compute_Hash_Value_Bitstream(hash_data, basis_bits);
116               
Note: See TracBrowser for help on using the repository browser.