source: trunk/symtab/pbgs_div_symbol_table.h @ 2546

Last change on this file since 2546 was 1742, checked in by vla24, 7 years ago

SymbolTable?: Fixed custom hashing function for parallel bitstream based grouping

File size: 5.1 KB
Line 
1/*
2 * Created on: 8-August-2011
3 * Author: Vera Lukman
4 *
5 * This symbol table is designed specifically for Parallel Bitstream-Based Length Sorting
6 * Division by 2 grouping strategy.
7 *
8 */
9
10#ifndef PBGS_DIV_SYMBOL_TABLE_H
11#define PBGS_DIV_SYMBOL_TABLE_H
12//#define XML_PARSER    // Defines this macro if this symbol table is going to be used as
13                        // a part of an XML parser.
14#define DEBUG_PBGS_DIV 0
15
16#define TOTAL_GROUPS 10
17#define LAST_GROUP TOTAL_GROUPS-1
18
19#define USE_FUNCTION_TEMPLATES
20#include "stringpool.h"
21#include "bitstream_super_hash_table.h"
22#include "bitstream_div_hash_table.h"
23#include "ls_symbol_table_compare.h"
24#include "ls_symbol_table_util.h"
25#include "pbgs_utilities.h"
26
27#if DEBUG_PBGS_DIV
28#include <iostream>
29using namespace std;
30#endif
31
32class PBGSDivSymbolTable
33{
34public:
35    PBGSDivSymbolTable();
36    ~PBGSDivSymbolTable();
37
38    template <int L> inline int Lookup_or_Insert_Name(char* name, int hashvalue);
39
40    inline int Lookup_or_Insert_Name_32(char* name, int hashvalue, int L);
41
42    inline int Lookup_or_Insert_Name(char* name, int hashvalue, int L);
43
44    void Print_Symbol_Table_Distribution();
45
46private:
47    StringPool <4096,100> m_pool;
48    BitStreamSuperHashTable* m_hashTable[TOTAL_GROUPS];
49    int m_globalNameCount;
50
51    inline int getGroup(int L);
52
53    template <int L> inline char* Store_Name(const char* name);
54};
55
56PBGSDivSymbolTable::PBGSDivSymbolTable()
57    :m_globalNameCount(1)
58{
59    m_hashTable[1] = new BitStreamDivHashTable<2>();
60    m_hashTable[2] = new BitStreamDivHashTable<4>();
61    m_hashTable[3] = new BitStreamDivHashTable<6>();
62    m_hashTable[4] = new BitStreamDivHashTable<8>();
63    m_hashTable[5] = new BitStreamDivHashTable<10>();
64    m_hashTable[6] = new BitStreamDivHashTable<12>();
65    m_hashTable[7] = new BitStreamDivHashTable<14>();
66    m_hashTable[8] = new BitStreamDivHashTable<16>();
67    m_hashTable[9] = new BitStreamDivHashTable<17>();
68};
69
70PBGSDivSymbolTable::~PBGSDivSymbolTable()
71{
72    for (int i = 1; i < TOTAL_GROUPS; i ++)
73    {
74        delete m_hashTable[i];
75        m_hashTable[i] = NULL;
76    }
77}
78
79template <> inline char* PBGSDivSymbolTable::Store_Name<3>(const char* name)
80{
81    return m_pool.Insert(name, 3, 1);
82}
83
84template <> inline char* PBGSDivSymbolTable::Store_Name<5>(const char* name)
85{
86    return m_pool.Insert(name, 5, 3);
87}
88
89template <> inline char* PBGSDivSymbolTable::Store_Name<6>(const char* name)
90{
91    return m_pool.Insert(name, 6, 2);
92}
93
94template <> inline char* PBGSDivSymbolTable::Store_Name<7>(const char* name)
95{
96    return m_pool.Insert(name, 7, 1);
97}
98
99template <int L> inline char* PBGSDivSymbolTable::Store_Name(const char* name)
100{
101    return m_pool.Insert(name, L);
102}
103
104// L should be an even number, L is in [1,16]
105template <int L>
106        inline int PBGSDivSymbolTable::Lookup_or_Insert_Name(char* name, int hashvalue)
107{
108    int GID = 0;
109    int group = getGroup(L);
110
111    //Lookup
112    GID = ((BitStreamDivHashTable<L>*)m_hashTable[group])->Lookup_Name(name, hashvalue);
113
114    if (!GID) //symbol not found
115    {
116        // Store the symbol
117        char* c = Store_Name<L>(name);
118
119        //Check if the last character is a delimiter
120#ifdef XMLWF
121        if (isXMLDelimiter(name[L-1]))
122#else
123        if (isGeneralDelimiter(name[L-1]))
124#endif
125        {
126            //Do another lookup for name with L = L-1
127            GID = ((BitStreamDivHashTable<L>*)m_hashTable[group])->Lookup_Name_Odd(name, hashvalue);
128
129            if (!GID)
130            {
131                //Symbol is not found in odd table
132                GID = m_globalNameCount;
133                m_globalNameCount++;
134
135                // Store symbol for odd length. We need to do this because the symbol-key comparison
136                // in the hash table does not mask out characters.
137                char* oddSymbol = Store_Name<L-1>(name);
138
139                // Insert into odd table
140                ((BitStreamDivHashTable<L>*)m_hashTable[group])->Insert_Name_Odd_Symbol(oddSymbol, hashvalue, GID);
141            }
142        }
143        else
144        {
145            //even length symbol
146            GID = m_globalNameCount;
147            m_globalNameCount++;
148        }
149
150        //Insert into even table
151        ((BitStreamDivHashTable<L>*)m_hashTable[group])->Insert_Name(c, hashvalue, GID);
152    }
153#if DEBUG_PBGS_DIV
154    char delim = name[L];
155    name[L] = '\0';
156    cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << endl;
157    name[L] = delim;
158#endif
159
160    return GID;
161}
162
163// Use this function for L > 16
164inline int PBGSDivSymbolTable::Lookup_or_Insert_Name(char* name, int hashvalue, int lgth)
165{
166    int GID = 0;
167
168#if DEBUG_PBGS_DIV
169    cout << "Lookup or Insert: " << name << endl;
170#endif
171
172    //Lookup
173    GID = ((BitStreamDivHashTable<17>*)m_hashTable[LAST_GROUP])->Lookup_Name_17(name, hashvalue, lgth);
174
175    //Insert
176    if (!GID) //symbol not found
177    {
178        char* c = m_pool.Insert(name, lgth);
179        GID = m_globalNameCount;
180
181        ((BitStreamDivHashTable<17>*)m_hashTable[LAST_GROUP])->Insert_Name(c, hashvalue, m_globalNameCount, lgth);
182        m_globalNameCount++;
183    }
184    return GID;
185}
186
187inline int PBGSDivSymbolTable::getGroup(int L)
188{
189    if (L > 16)
190        return LAST_GROUP;
191    else
192        return (L+1)/2;
193}
194
195void PBGSDivSymbolTable::Print_Symbol_Table_Distribution()
196{
197    for (int i = 1; i < TOTAL_GROUPS; i ++)
198    {
199        printf ("Group #%i\n", i );
200        m_hashTable[i]->Print_Symbol_Table_Distribution();
201    }
202
203}
204#endif // PBGS_DIV_SYMBOL_TABLE_H
205
206
Note: See TracBrowser for help on using the repository browser.