source: trunk/symtab/bitstream_hash_table_impl.h @ 3587

Last change on this file since 3587 was 1789, checked in by vla24, 8 years ago

updated hash function

File size: 16.3 KB
Line 
1/*
2 * Created on: 26-August-2011
3 * Author: Vera Lukman
4 *
5 * This hash table is almost an exact copy of HashSymbolTable except that
6 * BitStreamDivHashTable accepts a hashvalue to insert or lookup for a key.
7 * This hash table is designed specifically for Parallel Bitstream-Based Length Sorting Symbol Table.
8 * It is the base class of BitStreamHashTableImpl, BitstreamLogHashTable and BitstreamDivHashTable.
9 *
10 * Each grouping strategy use different comparison strategy
11 * Identity group sorting        f(L) = L
12 * Log group sorting             f(L) = ceil (log(L))
13 * Division group sorting        f(L) = floor( (L-1)/2^k )
14 *
15 */
16
17//#ifndef BITSTREAM_HASH_TABLE_IMPL_H
18//#define BITSTREAM_HASH_TABLE_IMPL_H
19
20//#define XOR_HASHVALUE //Define this macro to xor bitstream hashvalue with the computed hashvalue for custom hash function
21
22#include <stdlib.h>
23#include <stdio.h>
24#include "pbgs_utilities.h"
25#include "ls_symbol_table_util.h"
26
27
28// Use this method if L > 16
29template <int L>
30    unsigned int BitStreamHashTableImpl<L>::Lookup_Name_17(char * name, const int hashvalue, int length)
31{
32    unsigned int bucket = getBucket(hashvalue, m_tableSize, length);
33#if DEBUG_BHT
34    printf ("[%s] Bucket: %i\n", __FUNCTION__, bucket);
35#endif
36
37    // Look up the value in the chain
38    for(CHAIN* chain = m_table[bucket]; chain != NULL; chain = chain->next) {
39#if DEBUG_BHT
40        printf ("Check symbol: %s\n", chain->key);
41#endif
42        if( simd_compare ((unsigned char*)chain->key, (unsigned char*)name, length) )
43        {
44#if DEBUG_BHT
45            printf ("Found GID: %i\n", chain->GID);
46#endif
47            return chain->GID;
48        }
49    }
50
51#if DEBUG_BHT
52    printf ("Not Found GID: %i\n", 0);
53#endif
54    return 0;
55}
56
57template <int L>
58    void BitStreamHashTableImpl<L>::Insert_Name(const char * name, const int hashvalue, const unsigned int gid)
59{
60    unsigned int bucket = getBucket(name, hashvalue, m_tableSize);
61
62    // If it was not found, add it
63    CHAIN* next = m_table[bucket];
64    unsigned int chainlgth = chainLength(next);
65    if (chainlgth == MAX_CHAIN)
66    {
67        //current chain length is maximum, expand the table
68        expandHashTable();
69        bucket = getBucket(name, hashvalue, m_tableSize);
70        next = m_table[bucket];
71    }
72    CHAIN* curr = (CHAIN*) malloc(sizeof(CHAIN));
73    curr->next = next;
74    curr->key = name;
75    curr->hashvalue = hashvalue;
76    curr->GID = gid;
77    m_table[bucket] = curr;
78
79#if DEBUG_BHT
80    printf ("Given GID: %i | Stored GID: %i\n", gid, m_table[bucket]->GID);
81#endif
82}
83
84template <int L>
85    void BitStreamHashTableImpl<L>::Insert_Name(const char * name, const int hashvalue, const unsigned int gid, const unsigned int lgth)
86{
87    unsigned int bucket = getBucket(hashvalue, m_tableSize, lgth);
88#if DEBUG_BHT
89    printf ("[%s_lgth] Bucket: %i\n", __FUNCTION__, bucket, lgth);
90#endif
91
92    // If it was not found, add it
93    CHAIN* next = m_table[bucket];
94    unsigned int chainlgth = chainLength(next);
95    if (chainlgth == MAX_CHAIN)
96    {
97        //current chain length is maximum, expand the table
98        expandHashTable();
99        bucket = getBucket(hashvalue, m_tableSize, lgth);
100        next = m_table[bucket];
101    }
102    CHAIN* curr = (CHAIN*) malloc(sizeof(CHAIN));
103    curr->next = next;
104    curr->key = name;
105    curr->hashvalue = hashvalue;
106    curr->GID = gid;
107    curr->lgth = lgth;
108    m_table[bucket] = curr;
109
110#if DEBUG_BHT
111    printf ("Given GID: %i | Stored GID: %i\n", gid, m_table[bucket]->GID);
112#endif
113}
114
115template <int L>
116    void BitStreamHashTableImpl<L>::expandHashTable()
117{
118    unsigned int tempTableSize = nextTableSize();
119    CHAIN** tempTable = allocateHashTable(tempTableSize);
120    if (!tempTable)
121    {
122        printf ("Failed to allocate new table, cannot add new symbols.\n");
123    }
124
125
126    for(unsigned int bucket = 0; bucket < m_tableSize; bucket++)
127    {
128        CHAIN* curr = m_table[bucket];
129        m_table[bucket] = NULL;
130
131        while (curr != NULL)
132        {
133            CHAIN* next = curr->next;
134
135            //insert to new table
136            unsigned int tempIndex = getBucket(curr->key, curr->hashvalue, tempTableSize);
137            CHAIN* tempNext = tempTable[tempIndex];
138            curr->next = tempNext;
139            tempTable[tempIndex] = curr;
140
141            //update curr
142            curr = next;
143        }
144    }
145
146    // Deallocate m_table
147    deallocateHashTable(m_tableSize, m_table);
148
149    // Reassign m_table
150    m_table = tempTable;
151    m_tableSize = tempTableSize;
152}
153
154template <>
155    void BitStreamHashTableImpl<17>::expandHashTable()
156{
157    unsigned int tempTableSize = nextTableSize();
158    CHAIN** tempTable = allocateHashTable(tempTableSize);
159    if (!tempTable)
160    {
161        printf ("Failed to allocate new table, cannot add new symbols.\n");
162    }
163
164    for(unsigned int bucket = 0; bucket < m_tableSize; bucket++)
165    {
166        CHAIN* curr = m_table[bucket];
167        m_table[bucket] = NULL;
168
169        while (curr != NULL)
170        {
171            CHAIN* next = curr->next;
172
173            //insert to new table
174            unsigned int tempIndex = getBucket(curr->hashvalue, tempTableSize, curr->lgth);
175            CHAIN* tempNext = tempTable[tempIndex];
176            curr->next = tempNext;
177            tempTable[tempIndex] = curr;
178
179            //update curr
180            curr = next;
181        }
182    }
183
184    // Deallocate m_table
185    deallocateHashTable(m_tableSize, m_table);
186
187    // Reassign m_table
188    m_table = tempTable;
189    m_tableSize = tempTableSize;
190}
191
192template <int L>
193    inline unsigned int BitStreamHashTableImpl<L>::getBucket(const int hashvalue, unsigned int tableSize, unsigned int length)
194{
195#if DEBUG_BHT
196    printf ("%s_%i (should be 17) | hashvalue: %i | ", __FUNCTION__, L, hashvalue);
197#endif
198//    return hashvalue & TABLE_SIZE_MASK(tableSize);
199    return longHashFunction<uint32_t>((uint32_t*)(&hashvalue), (uint32_t*)(&hashvalue), length, BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
200}
201
202
203// Is it better if hash function takes integer type as template parameter?
204// getBucket<uint8_t>
205// uint8_t -- L = 1
206// uint16_t -- L = 2, 9-16 (bitstream)
207// uint32_t -- L = 3,4, >16 (bitstream)
208// uint64_t -- L = 5,6,7,8
209// In hash function itself, don't pass in hashBitSize, use sizeOf(T)*8
210//
211// Problem: we might be missing some important bits. For example, consider the case if hashBitSize is actually 9,
212// but we are using 16 instead. So we are missing 7 important bits... Probably it does not really matter...
213// What about 32 bits?
214// This design will work gracefully with Log grouping.. Maybe we should use this...
215//
216// Unfortunately, we cannot use integer type as template parameter for getBucket.
217// Why? Because we need to use either bitstream or bytestream base on the length.
218// So we do need to write special implementation for specific length..
219//
220// For LOG GROUPING
221// We could do better!! Separate hash functions in this class into 2 implementations:
222// 1. computeHashValue<class T>(T* hashvalue, unsigned int tableSize)
223// 2. getBucket<int L>(T* hashvalue, unsigned int tableSize, const char* name) --> calls computeHashValue<class T>(...)
224// This way, we can do length specialization in getBucket --> getBucket decides which hashvalue to pass to computeHashValue
225
226// Use this for L in [9,16]
227template <int L>
228    inline unsigned int BitStreamHashTableImpl<L>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
229{
230#if DEBUG_BHT
231    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, L, hashvalue);
232#endif
233    int tempHashVal = hashvalue << L;
234    return (longHashFunction<uint32_t>((uint32_t*)(&hashvalue), (uint32_t*)(&tempHashVal), L<<1, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
235}
236
237template <>
238    inline unsigned int BitStreamHashTableImpl<1>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
239{
240#if DEBUG_BHT
241    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 1, hashvalue);
242#endif
243#ifdef XOR_HASHVALUE
244    return ( longHashFunction<uint8_t>((uint8_t*)name, (uint8_t*)name, BYTES_TO_BITS(1), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
245#else
246    return longHashFunction<uint8_t>((uint8_t*)name, (uint8_t*)name, BYTES_TO_BITS(1), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
247#endif
248}
249
250template <>
251    inline unsigned int BitStreamHashTableImpl<2>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
252{
253#if DEBUG_BHT
254    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 2, hashvalue);
255#endif
256#ifdef XOR_HASHVALUE
257    return ( longHashFunction<uint16_t>((uint16_t*)name, (uint16_t*)name, BYTES_TO_BITS(2), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
258#else
259    return longHashFunction<uint16_t>((uint16_t*)name, (uint16_t*)name, BYTES_TO_BITS(2), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
260#endif
261}
262
263template <>
264    inline unsigned int BitStreamHashTableImpl<3>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
265{
266#if DEBUG_BHT
267    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 3, hashvalue);
268#endif
269#ifdef XOR_HASHVALUE
270    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(3), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
271#else
272    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(3), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
273#endif
274}
275
276template <>
277    inline unsigned int BitStreamHashTableImpl<4>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
278{
279#if DEBUG_BHT
280    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 4, hashvalue);
281#endif
282#ifdef XOR_HASHVALUE
283    return (longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(4), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
284//    return ( longHashFunction<uint16_t>((uint16_t*)name, (uint16_t*)name+2, 16, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
285//    uint16_t temphashvalue = ( (*((uint16_t*)name)) ^ (*((uint16_t*)(name+2))) );
286//    return ( longHashFunction<uint16_t>(&temphashvalue, &temphashvalue, 16, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
287#else
288    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)name, BYTES_TO_BITS(4), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
289#endif
290}
291
292template <>
293    inline unsigned int BitStreamHashTableImpl<5>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
294{
295#if DEBUG_BHT
296    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 5, hashvalue);
297#endif
298#ifdef XOR_HASHVALUE
299//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+2), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
300    uint32_t temphashvalue = ( (*((uint32_t*)name)) ^ (*((uint32_t*)(name+1))) );
301    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
302#else
303    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+2), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
304#endif
305}
306
307template <>
308    inline unsigned int BitStreamHashTableImpl<6>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
309{
310#if DEBUG_BHT
311    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 6, hashvalue);
312#endif
313#ifdef XOR_HASHVALUE
314//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
315
316    uint32_t temphashvalue = ( (*((uint32_t*)name)) ^ (*((uint32_t*)(name+2))));
317    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
318#else
319    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(3), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
320#endif
321}
322
323template <>
324    inline unsigned int BitStreamHashTableImpl<7>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
325{
326#if DEBUG_BHT
327    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 7, hashvalue);
328#endif
329#ifdef XOR_HASHVALUE
330//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
331    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+3)));
332    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
333#else
334    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+3), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
335#endif
336}
337
338template <>
339    inline unsigned int BitStreamHashTableImpl<8>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
340{
341#if DEBUG_BHT
342    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 8, hashvalue);
343#endif
344#ifdef XOR_HASHVALUE
345//    return ( longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+4), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
346    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4)));
347    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
348#else
349    return longHashFunction<uint32_t>((uint32_t*)name, (uint32_t*)(name+4), BYTES_TO_BITS(4), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
350#endif
351}
352
353template <>
354    inline unsigned int BitStreamHashTableImpl<9>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
355{
356#if DEBUG_BHT
357    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 9, hashvalue);
358#endif
359#ifdef XOR_HASHVALUE
360//    return ( longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+4), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
361    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4))) ^ (*((uint32_t*)(name+5)));
362    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
363#else
364    return longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+4), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
365#endif
366}
367
368template <>
369    inline unsigned int BitStreamHashTableImpl<10>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
370{
371#if DEBUG_BHT
372    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 10, hashvalue);
373#endif
374#ifdef XOR_HASHVALUE
375//    return ( longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+5), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
376    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4))) ^ (*((uint32_t*)(name+6)));
377    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
378#else
379    return longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+5), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
380#endif
381}
382
383template <>
384    inline unsigned int BitStreamHashTableImpl<11>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
385{
386#if DEBUG_BHT
387    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 11, hashvalue);
388#endif
389#ifdef XOR_HASHVALUE
390//    return ( longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+4), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
391    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4))) ^ (*((uint32_t*)(name+7)));
392    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
393#else
394    return longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+4), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
395#endif
396}
397
398template <>
399    inline unsigned int BitStreamHashTableImpl<12>::getBucket(const char* name, const int hashvalue, unsigned int tableSize)
400{
401#if DEBUG_BHT
402    printf ("%s_%i | hashvalue: %i | ", __FUNCTION__, 12, hashvalue);
403#endif
404#ifdef XOR_HASHVALUE
405//    return ( longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+5), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
406    uint32_t temphashvalue = (*((uint32_t*)name)) ^ (*((uint32_t*)(name+4))) ^ (*((uint32_t*)(name+8)));
407    return ( longHashFunction<uint32_t>(&temphashvalue, &temphashvalue, 32, BIT_SIZE(tableSize)) ^ ~hashvalue ) & TABLE_SIZE_MASK(tableSize);
408#else
409    return longHashFunction<uint64_t>((uint64_t*)name, (uint64_t*)(name+5), BYTES_TO_BITS(5), BIT_SIZE(tableSize)) & TABLE_SIZE_MASK(tableSize);
410#endif
411}
412
413//#endif // BITSTREAM_HASH_TABLE_IMPL_H
Note: See TracBrowser for help on using the repository browser.