source: trunk/symtab/pbgs_div_symbol_table.h @ 1653

Last change on this file since 1653 was 1653, checked in by vla24, 8 years ago

SymbolTable?: implemented auto-resize and custom hashing for different symbol length for division and identity length grouping

File size: 5.3 KB
Line 
1/*
2 * Created on: 8-August-2011
3 * Author: Vera Lukman
4 *
5 * This symbol table is designed specifically for Parallel Bitstream-Based Length Sorting
6 * Division by 2 grouping strategy.
7 *
8 */
9
10#ifndef PBGS_DIV_SYMBOL_TABLE_H
11#define PBGS_DIV_SYMBOL_TABLE_H
12
13#define DEBUG_PBGS_DIV 0
14
15// Define either one of grouping functions
16//#define USE_LENGTH_SORT       // f(L) = L
17//#define USE_LOG_SORT          // f(L) = ceil (log(L))
18#define USE_DIV_SORT            // f(L) = floor ((L+1)/2)
19
20#define TOTAL_GROUPS 10
21#define LAST_GROUP TOTAL_GROUPS-1
22
23#define USE_FUNCTION_TEMPLATES
24#include "stringpool.h"
25#include "bitstream_super_hash_table.h"
26#include "bitstream_div_hash_table.h"
27#include "ls_symbol_table_compare.h"
28#include "ls_symbol_table_util.h"
29
30#if DEBUG_PBGS_DIV
31#include <iostream>
32using namespace std;
33#endif
34                           //0     1     2    3     4    5    6    7
35static char delimiters[] = {' ', '\0', '\0', ';', '\0', '=', '>', '/'};
36
37class PBGSDivSymbolTable
38{
39public:
40    PBGSDivSymbolTable();
41    ~PBGSDivSymbolTable();
42
43    template <int L> inline int Lookup_or_Insert_Name(char* name, int hashvalue);
44
45    inline int Lookup_or_Insert_Name_32(char* name, int hashvalue, int L);
46
47    inline int Lookup_or_Insert_Name(char* name, int hashvalue, int L);
48
49    void Print_Symbol_Table_Distribution();
50
51private:
52    StringPool <4096,100> m_pool;
53    BitStreamSuperHashTable* m_hashTable[TOTAL_GROUPS];
54    int m_globalNameCount;
55
56    inline int getGroup(int L);
57    inline bool isDelimiter(const char c);
58
59    template <int L> inline char* Store_Name(const char* name);
60};
61
62PBGSDivSymbolTable::PBGSDivSymbolTable()
63    :m_globalNameCount(1)
64{
65    m_hashTable[1] = new BitStreamDivHashTable<2>();
66    m_hashTable[2] = new BitStreamDivHashTable<4>();
67    m_hashTable[3] = new BitStreamDivHashTable<6>();
68    m_hashTable[4] = new BitStreamDivHashTable<8>();
69    m_hashTable[5] = new BitStreamDivHashTable<10>();
70    m_hashTable[6] = new BitStreamDivHashTable<12>();
71    m_hashTable[7] = new BitStreamDivHashTable<14>();
72    m_hashTable[8] = new BitStreamDivHashTable<16>();
73    m_hashTable[9] = new BitStreamDivHashTable<17>();
74};
75
76PBGSDivSymbolTable::~PBGSDivSymbolTable()
77{
78    for (int i = 1; i < TOTAL_GROUPS; i ++)
79    {
80        delete m_hashTable[i];
81        m_hashTable[i] = NULL;
82    }
83}
84
85template <> inline char* PBGSDivSymbolTable::Store_Name<3>(const char* name)
86{
87    return m_pool.Insert(name, 3, 1);
88}
89
90template <> inline char* PBGSDivSymbolTable::Store_Name<5>(const char* name)
91{
92    return m_pool.Insert(name, 5, 3);
93}
94
95template <> inline char* PBGSDivSymbolTable::Store_Name<6>(const char* name)
96{
97    return m_pool.Insert(name, 6, 2);
98}
99
100template <> inline char* PBGSDivSymbolTable::Store_Name<7>(const char* name)
101{
102    return m_pool.Insert(name, 7, 1);
103}
104
105template <int L> inline char* PBGSDivSymbolTable::Store_Name(const char* name)
106{
107    return m_pool.Insert(name, L);
108}
109
110// L should be an even number, L is in [1,16]
111template <int L>
112        inline int PBGSDivSymbolTable::Lookup_or_Insert_Name(char* name, int hashvalue)
113{
114    int GID = 0;
115    int group = getGroup(L);
116
117    //Lookup
118    GID = ((BitStreamDivHashTable<L>*)m_hashTable[group])->Lookup_Name(name, hashvalue);
119
120    if (!GID) //symbol not found
121    {
122        // Store the symbol
123        char* c = Store_Name<L>(name);
124
125        //Check if the last character is a delimiter
126        if (isDelimiter(name[L-1]))
127        {
128            //Do another lookup for name with L = L-1
129            GID = ((BitStreamDivHashTable<L>*)m_hashTable[group])->Lookup_Name_Odd(name, hashvalue);
130
131            if (!GID)
132            {
133                //Symbol is not found in odd table
134                GID = m_globalNameCount;
135                m_globalNameCount++;
136
137                // Store symbol for odd length. We need to do this because the symbol-key comparison
138                // in the hash table does not mask out characters.
139                char* oddSymbol = Store_Name<L-1>(name);
140
141                // Insert into odd table
142                ((BitStreamDivHashTable<L>*)m_hashTable[group])->Insert_Name_Odd_Symbol(oddSymbol, hashvalue, GID);
143            }
144        }
145        else
146        {
147            //even length symbol
148            GID = m_globalNameCount;
149            m_globalNameCount++;
150        }
151
152        //Insert into even table
153        ((BitStreamDivHashTable<L>*)m_hashTable[group])->Insert_Name(c, hashvalue, GID);
154    }
155#if DEBUG_PBGS_DIV
156    char delim = name[L];
157    name[L] = '\0';
158    cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << endl;
159    name[L] = delim;
160#endif
161
162    return GID;
163}
164
165// Use this function for L > 16
166inline int PBGSDivSymbolTable::Lookup_or_Insert_Name(char* name, int hashvalue, int lgth)
167{
168    int GID = 0;
169
170#if DEBUG_PBGS_DIV
171    cout << "Lookup or Insert: " << name << endl;
172#endif
173
174    //Lookup
175    GID = ((BitStreamDivHashTable<17>*)m_hashTable[LAST_GROUP])->Lookup_Name_17(name, hashvalue, lgth);
176
177    //Insert
178    if (!GID) //symbol not found
179    {
180        char* c = m_pool.Insert(name, lgth);
181        GID = m_globalNameCount;
182
183        ((BitStreamDivHashTable<17>*)m_hashTable[LAST_GROUP])->Insert_Name(c, hashvalue, m_globalNameCount, lgth);
184        m_globalNameCount++;
185    }
186    return GID;
187}
188
189
190inline bool PBGSDivSymbolTable::isDelimiter(const char c)
191{
192    return c == delimiters[(unsigned int) c & 0x7];
193}
194
195inline int PBGSDivSymbolTable::getGroup(int L)
196{
197    if (L > 16)
198        return LAST_GROUP;
199    else
200        return (L+1)/2;
201}
202
203void PBGSDivSymbolTable::Print_Symbol_Table_Distribution()
204{
205    for (int i = 1; i < TOTAL_GROUPS; i ++)
206    {
207        fprintf (stderr, "Group #%i\n", i );
208        m_hashTable[i]->Print_Symbol_Table_Distribution();
209    }
210
211}
212#endif // PBGS_DIV_SYMBOL_TABLE_H
213
214
Note: See TracBrowser for help on using the repository browser.