source: trunk/symtab/bitstream_hash_table_impl.h @ 1772

Last change on this file since 1772 was 1772, checked in by vla24, 8 years ago

Updated symbol table expansion decision.

File size: 14.8 KB
Line 
1/*
2 * Created on: 26-August-2011
3 * Author: Vera Lukman
4 *
5 * This hash table is almost an exact copy of HashSymbolTable except that
6 * BitStreamDivHashTable accepts a hashvalue to insert or lookup for a key.
7 * This hash table is designed specifically for Parallel Bitstream-Based Length Sorting Symbol Table.
8 * It is the base class of BitStreamHashTableImpl, BitstreamLogHashTable and BitstreamDivHashTable.
9 *
10 * Each grouping strategy use different comparison strategy
11 * Identity group sorting        f(L) = L
12 * Log group sorting             f(L) = ceil (log(L))
13 * Division group sorting        f(L) = floor( (L-1)/2^k )
14 *
15 */
16
17//#ifndef BITSTREAM_HASH_TABLE_IMPL_H
18//#define BITSTREAM_HASH_TABLE_IMPL_H
19
20//#define XOR_HASHVALUE //Define this macro to xor bitstream hashvalue with the computed hashvalue for custom hash function
21
22#include <stdlib.h>
23#include <stdio.h>
24#include "pbgs_utilities.h"
25#include "ls_symbol_table_util.h"
26
27
28// Use this method if L > 16
29template <int L>
30    unsigned int BitStreamHashTableImpl<L>::Lookup_Name_17(char * name, const int hashvalue, int length)
31{
32    unsigned int bucket = getBucket(hashvalue, m_tableSize, length);
33#if DEBUG_BHT
34    printf ("[%s] Bucket: %i\n", __FUNCTION__, bucket);
35#endif
36
37    // Look up the value in the chain
38    for(CHAIN* chain = m_table[bucket]; chain != NULL; chain = chain->next) {
39#if DEBUG_BHT
40        printf ("Check symbol: %s\n", chain->key);
41#endif
42        if( simd_compare ((unsigned char*)chain->key, (unsigned char*)name, length) )
43        {
44#if DEBUG_BHT
45            printf ("Found GID: %i\n", chain->GID);
46#endif
47            return chain->GID;
48        }
49    }
50
51#if DEBUG_BHT
52    printf ("Not Found GID: %i\n", 0);
53#endif
54    return 0;
55}
56
57template <int L>
58    void BitStreamHashTableImpl<L>::Insert_Name(const char * name, const int hashvalue, const unsigned int gid)
59{
60    unsigned int bucket = getBucket(name, hashvalue, m_tableSize);
61
62    // If it was not found, add it
63    CHAIN* next = m_table[bucket];
64    unsigned int chainlgth = chainLength(next);
65//    if
66    if (chainlgth == MAX_CHAIN && m_totalSymbols > m_maxSymbols)
67    {
68        //current chain length is maximum, expand the table
69        expandHashTable();
70        bucket = getBucket(name, hashvalue, m_tableSize);
71        next = m_table[bucket];
72    }
73    CHAIN* curr = (CHAIN*) malloc(sizeof(CHAIN));
74    curr->next = next;
75    curr->key = name;
76    curr->hashvalue = hashvalue;
77    curr->GID = gid;
78    m_table[bucket] = curr;
79
80    m_totalSymbols++;
81
82#if DEBUG_BHT
83    printf ("Given GID: %i | Stored GID: %i\n", gid, m_table[bucket]->GID);
84#endif
85}
86
87template <int L>
88    void BitStreamHashTableImpl<L>::Insert_Name(const char * name, const int hashvalue, const unsigned int gid, const unsigned int lgth)
89{
90    unsigned int bucket = getBucket(hashvalue, m_tableSize, lgth);
91#if DEBUG_BHT
92    printf ("[%s_lgth] Bucket: %i\n", __FUNCTION__, bucket, lgth);
93#endif
94
95    // If it was not found, add it
96    CHAIN* next = m_table[bucket];
97    unsigned int chainlgth = chainLength(next);
98//    if (chainlgth == MAX_CHAIN)
99    if (chainlgth == MAX_CHAIN && m_totalSymbols > m_maxSymbols)
100    {
101        //current chain length is maximum, expand the table
102        expandHashTable();
103        bucket = getBucket(hashvalue, m_tableSize, lgth);
104        next = m_table[bucket];
105    }
106    CHAIN* curr = (CHAIN*) malloc(sizeof(CHAIN));
107    curr->next = next;
108    curr->key = name;
109    curr->hashvalue = hashvalue;
110    curr->GID = gid;
111    curr->lgth = lgth;
112    m_table[bucket] = curr;
113
114    m_totalSymbols++;
115
116#if DEBUG_BHT
117    printf ("Given GID: %i | Stored GID: %i\n", gid, m_table[bucket]->GID);
118#endif
119}
120
121template <int L>
122    void BitStreamHashTableImpl<L>::expandHashTable()
123{
124    unsigned int tempTableSize = nextTableSize();
125    CHAIN** tempTable = allocateHashTable(tempTableSize);
126    if (!tempTable)
127    {
128        printf ("Failed to allocate new table, cannot add new symbols.\n");
129    }
130
131
132    for(unsigned int bucket = 0; bucket < m_tableSize; bucket++)
133    {
134        CHAIN* curr = m_table[bucket];
135        m_table[bucket] = NULL;
136
137        while (curr != NULL)
138        {
139            CHAIN* next = curr->next;
140
141            //insert to new table
142            unsigned int tempIndex = getBucket(curr->key, curr->hashvalue, tempTableSize);
143            CHAIN* tempNext = tempTable[tempIndex];
144            curr->next = tempNext;
145            tempTable[tempIndex] = curr;
146
147            //update curr
148            curr = next;
149        }
150    }
151
152    // Deallocate m_table
153    deallocateHashTable(m_tableSize, m_table);
154
155    // Reassign m_table
156    m_table = tempTable;
157    m_tableSize = tempTableSize;
158
159    m_maxSymbols = m_tableSize >> 1 + m_tableSize >> 2;
160}
161
162template <>
163    void BitStreamHashTableImpl<17>::expandHashTable()
164{
165    unsigned int tempTableSize = nextTableSize();
166    CHAIN** tempTable = allocateHashTable(tempTableSize);
167    if (!tempTable)
168    {
169        printf ("Failed to allocate new table, cannot add new symbols.\n");
170    }
171
172    for(unsigned int bucket = 0; bucket < m_tableSize; bucket++)
173    {
174        CHAIN* curr = m_table[bucket];
175        m_table[bucket] = NULL;
176
177        while (curr != NULL)
178        {
179            CHAIN* next = curr->next;
180
181            //insert to new table
182            unsigned int tempIndex = getBucket(curr->hashvalue, tempTableSize, curr->lgth);
183            CHAIN* tempNext = tempTable[tempIndex];
184            curr->next = tempNext;
185            tempTable[tempIndex] = curr;
186
187            //update curr
188            curr = next;
189        }
190    }
191
192    // Deallocate m_table
193    deallocateHashTable(m_tableSize, m_table);
194
195    // Reassign m_table
196    m_table = tempTable;
197    m_tableSize = tempTableSize;
198
199    m_maxSymbols = m_tableSize >> 1 + m_tableSize >> 2;
200}
201
202template <int L>
203    inline unsigned int BitStreamHashTableImpl<L>::getBucket(const int hashvalue, unsigned int tableSize, unsigned int length)
204{
205#if DEBUG_BHT
206    printf ("%s_%i (should be 17) | hashvalue: %i | ", __FUNCTION__, L, hashvalue);
207#endif
208//    return hashvalue & TABLE_SIZE_MASK(tableSize);
209    return longHashFunction<uint32_t>((uint32_t*)(&hashvalue), (uint32_t*)(&hashvalue), length, BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
210}
211
212
213// Is it better if hash function takes integer type as template parameter?
214// getBucket<uint8_t>
215// uint8_t -- L = 1
216// uint16_t -- L = 2, 9-16 (bitstream)
217// uint32_t -- L = 3,4, >16 (bitstream)
218// uint64_t -- L = 5,6,7,8
219// In hash function itself, don't pass in hashBitSize, use sizeOf(T)*8
220//
221// Problem: we might be missing some important bits. For example, consider the case if hashBitSize is actually 9,
222// but we are using 16 instead. So we are missing 7 important bits... Probably it does not really matter...
223// What about 32 bits?
224// This design will work gracefully with Log grouping.. Maybe we should use this...
225//
226// Unfortunately, we cannot use integer type as template parameter for getBucket.
227// Why? Because we need to use either bitstream or bytestream base on the length.
228// So we do need to write special implementation for specific length..
229//
230// For LOG GROUPING
231// We could do better!! Separate hash functions in this class into 2 implementations:
232// 1. computeHashValue<class T>(T* hashvalue, unsigned int tableSize)
233// 2. getBucket<int L>(T* hashvalue, unsigned int tableSize, const char* name) --> calls computeHashValue<class T>(...)
234// This way, we can do length specialization in getBucket --> getBucket decides which hashvalue to pass to computeHashValue
235
236// Use this for L in [9,16]
237template <int L>
238    inline unsigned int BitStreamHashTableImpl<L>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
239{
240#if DEBUG_BHT
241    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, L, hashvalue);
242#endif
243    int tempHashVal = hashvalue << L;
244    return (longHashFunction<uint32_t>((uint32_t*)(&hashvalue), (uint32_t*)(&tempHashVal), L<<1, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
245}
246
247template <>
248    inline unsigned int BitStreamHashTableImpl<1>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
249{
250#if DEBUG_BHT
251    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 1, hashvalue);
252#endif
253#ifdef XOR_HASHVALUE
254    return ( longHashFunction<uint8_t>((uint8_t*)name, (uint8_t*)name, BYTES_TO_BITS(1), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
255#else
256    return longHashFunction<uint8_t>((uint8_t*)name, (uint8_t*)name, BYTES_TO_BITS(1), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
257#endif
258}
259
260template <>
261    inline unsigned int BitStreamHashTableImpl<2>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
262{
263#if DEBUG_BHT
264    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 2, hashvalue);
265#endif
266#ifdef XOR_HASHVALUE
267    return ( longHashFunction<uint16_t>((uint16_t*)name, (uint16_t*)name, BYTES_TO_BITS(2), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
268#else
269    return longHashFunction<uint16_t>((uint16_t*)name, (uint16_t*)name, BYTES_TO_BITS(2), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
270#endif
271}
272
273template <>
274    inline unsigned int BitStreamHashTableImpl<3>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
275{
276#if DEBUG_BHT
277    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 3, hashvalue);
278#endif
279#ifdef XOR_HASHVALUE
280    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(3), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
281#else
282    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(3), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
283#endif
284}
285
286template <>
287    inline unsigned int BitStreamHashTableImpl<4>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
288{
289#if DEBUG_BHT
290    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 4, hashvalue);
291#endif
292#ifdef XOR_HASHVALUE
293//    return ( longHashFunction<uint16_t>((uint16_t*)name, (uint16_t*)name+2, 16, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
294    uint16_t temphashvalue = ( (*((uint16_t*)name)) ^ (*((uint16_t*)(name+2))) );
295    return ( longHashFunction<uint16_t>(&temphashvalue, &temphashvalue, 16, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
296#else
297    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(4), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
298#endif
299}
300
301template <>
302    inline unsigned int BitStreamHashTableImpl<5>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
303{
304#if DEBUG_BHT
305    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 5, hashvalue);
306#endif
307#ifdef XOR_HASHVALUE
308//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+2), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
309    uint16_t temphashvalue = ( (*((uint16_t*)name)) ^ (*((uint16_t*)(name+2))) ^ (* ((uint16_t*)(name+3)) ) );
310    return ( longHashFunction<uint16_t>(&temphashvalue, &temphashvalue, 16, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
311#else
312    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+2), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
313#endif
314}
315
316template <>
317    inline unsigned int BitStreamHashTableImpl<6>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
318{
319#if DEBUG_BHT
320    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 6, hashvalue);
321#endif
322#ifdef XOR_HASHVALUE
323//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
324    uint16_t temphashvalue = ( (*((uint16_t*)name)) ^ (*((uint16_t*)(name+2))) ^ (*((uint16_t*)(name+4))) );
325    return ( longHashFunction<uint16_t>(&temphashvalue, &temphashvalue, 16, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
326#else
327    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
328#endif
329}
330
331template <>
332    inline unsigned int BitStreamHashTableImpl<7>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
333{
334#if DEBUG_BHT
335    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 7, hashvalue);
336#endif
337#ifdef XOR_HASHVALUE
338//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
339    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+3)));
340    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
341#else
342    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
343#endif
344}
345
346template <>
347    inline unsigned int BitStreamHashTableImpl<8>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
348{
349#if DEBUG_BHT
350    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 8, hashvalue);
351#endif
352#ifdef XOR_HASHVALUE
353//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+4), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
354    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4)));
355    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
356#else
357    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+4), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
358#endif
359}
360
361template <>
362    inline unsigned int BitStreamHashTableImpl<9>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
363{
364#if DEBUG_BHT
365    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 9, hashvalue);
366#endif
367#ifdef XOR_HASHVALUE
368//    return ( longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+4), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
369    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4))) ^ (*((uint32_t*)(name+5)));
370    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
371#else
372    return longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+4), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
373#endif
374}
375
376template <>
377    inline unsigned int BitStreamHashTableImpl<10>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
378{
379#if DEBUG_BHT
380    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 10, hashvalue);
381#endif
382#ifdef XOR_HASHVALUE
383//    return ( longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+5), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
384    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4))) ^ (*((uint32_t*)(name+6)));
385    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
386#else
387    return longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+5), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
388#endif
389}
390
391//#endif // BITSTREAM_HASH_TABLE_IMPL_H
Note: See TracBrowser for help on using the repository browser.