Changeset 1918 for trunk/lib/hash.hpp


Ignore:
Timestamp:
Feb 10, 2012, 10:16:53 PM (7 years ago)
Author:
ksherdy
Message:

Update hash functions.

File:
1 moved

Legend:

Unmodified
Added
Removed
  • trunk/lib/hash.hpp

    r1914 r1918  
    88using namespace std;
    99
    10 /*  Hash Functions Templated on Field Width.
     10/* WARNING:     Pad 8 bytes.
     11                Internal static casts of uint8_t * b0 to uint64_t * and deference. */
    1112
    12     slice<1> - bit slice operation
    13     FW=1 'pos' and 'lgth' args in terms of bits,
    14     e.g. pos=1,lgth=7 => begin at bit index 1 and slice symbol of length 7 bits.
    1513
    16     slice<8> - byte slice operation
    17     FW=8 'pos' and 'lgth' args in terms of bytes, i.e. byte at index 1
    18     e.g. pos=1,lgth=4 => begin at byte index position 1 and slice symbol of length 4 bytes.
     14static IDISA_ALWAYS_INLINE uint64_t gen_mask(const uint32_t hash_size);
     15static IDISA_ALWAYS_INLINE uint64_t byte_mask_hash(const uint8_t *b0, const uint32_t pos, const uint32_t hash_size);
    1916
    20     shift_hash<1> - returns a hash value based on bit index and symbol length
    21     shift_hash<8> - returns a hash value based on byte index and symbol length
    22 */
     17/* pos (bits) */
     18static IDISA_ALWAYS_INLINE uint64_t bit_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size);
     19static IDISA_ALWAYS_INLINE uint64_t bit_compress_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size);
    2320
    24 /* WARNING: Casts uint8_t * buffer at pos to uint64_t * and deferences. Segmentation faults w/o sufficient buffer padding. */
    25 template<uint32_t FW>
    26 IDISA_ALWAYS_INLINE uint64_t mask_hash(const uint8_t * b0, const uint32_t hash_bits, const uint32_t lgth);
     21/* pos (bytes) */
     22static IDISA_ALWAYS_INLINE uint64_t byte_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size);
     23static IDISA_ALWAYS_INLINE uint64_t byte_compress_hash(const uint8_t * b0, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size);
    2724
    28 template<uint32_t FW>
    29 IDISA_ALWAYS_INLINE uint64_t slice(const uint8_t * buffer, const uint32_t pos, const uint32_t lgth);
    30 
    31 template<uint32_t FW>
    32 IDISA_ALWAYS_INLINE uint64_t shift_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t hash_bits, const uint32_t lgth);
    33 
    34 template<>
    35 IDISA_ALWAYS_INLINE uint64_t slice<1>(const uint8_t * buffer, const uint32_t pos, const uint32_t lgth) {
    36     assert(lgth > 0 && lgth <= 64);
    37 
    38     uint64_t shift = *((uint64_t *)(buffer + (pos/8))) >> (pos & 8-1);
    39     uint64_t slice_mask = UINT64_MAX >> ((sizeof(uint64_t)*8 - lgth));
    40 #ifndef NDEBUG
    41     print_register<uint64_t>("shift", shift);
    42     print_register<uint64_t>("slice mask", slice_mask);
    43 #endif
    44     return shift & slice_mask;
    45 }
    46 
    47 template<>
    48 IDISA_ALWAYS_INLINE uint64_t slice<8>(const uint8_t * buffer, const uint32_t pos, const uint32_t lgth) {
    49     assert(lgth > 0 && lgth <= 8);
    50 
    51     uint64_t shift = *((uint64_t *)(buffer+pos));
    52     uint64_t slice_mask = UINT64_MAX >> ((sizeof(uint64_t) - lgth)*8);
    53 #ifndef NDEBUG
    54     print_register<uint64_t>("shift", shift);
    55     print_register<uint64_t>("slice mask", slice_mask);
    56 #endif
    57     return shift & slice_mask;
    58 }
    59 
    60 template<uint32_t FW>
    61 IDISA_ALWAYS_INLINE uint64_t mask_hash(const uint8_t * b0, const uint32_t hash_bits, const uint32_t lgth) {
    62     assert(lgth > 0 && lgth*FW <= 64);
    63     assert((lgth*FW) > hash_bits);
     25static IDISA_ALWAYS_INLINE uint64_t gen_mask(const uint32_t hash_size) {
     26    assert(hash_size > 0);
    6427
    6528    const uint64_t ONE = 1;
    66     uint64_t mask = (ONE << hash_bits) - ONE;
     29    uint64_t mask = (ONE << hash_size) - ONE;
    6730#ifndef NDEBUG
    6831    print_register<uint64_t>("mask", mask);
    6932#endif
    70     return *((uint64_t *)b0) & mask;
     33    return mask;
    7134}
    7235
    73 template<uint32_t FW>
    74 IDISA_ALWAYS_INLINE uint64_t shift_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t hash_bits, const uint32_t lgth) {
    75     assert(lgth > 0 && lgth*FW <= 64);
    76     assert((lgth*FW) > hash_bits);
     36static IDISA_ALWAYS_INLINE uint64_t byte_mask_hash(const uint8_t *b0, const uint32_t pos, const uint32_t hash_size) {
     37    uint64_t mask = gen_mask(hash_size);
     38    return *((uint64_t *)(b0 + pos)) & mask;
     39}
    7740
    78     uint64_t x0 = slice<FW>(b0,pos,lgth);
    79     uint64_t x1 = slice<FW>(b1,pos,lgth);
    80     const uint64_t ONE = 1;
    81     uint64_t mask = (ONE << hash_bits) - ONE;
     41/* pos (bits) */
     42static IDISA_ALWAYS_INLINE uint64_t bit_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size) {
     43    assert(slice_size > 0 && slice_size <= 64);
     44
     45    uint64_t shift = *((uint64_t *)(buffer + (pos/8))) >> (pos & 8-1);
     46    uint64_t mask = gen_mask(slice_size);
     47    uint64_t r = shift & mask;
    8248
    8349#ifndef NDEBUG
    84     cout << "Position:  " << pos << endl;
    85     cout << "Symbol Lgth: " << lgth << endl;
    86     cout << "Hash Bits:   " << hash_bits << endl;
     50    print_register<uint64_t>("shift", shift);
     51    print_register<uint64_t>("mask", mask);
     52#endif
    8753
    88     uint64_t masked_x0 = x0 & mask;
    89     uint64_t shift_masked_x1 = (x1 >> ((lgth*FW) - hash_bits)) & mask;
     54    return r;
     55}
     56
     57/* pos (bytes) */
     58static IDISA_ALWAYS_INLINE uint64_t byte_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size) {
     59    assert(slice_size > 0 && slice_size <= 64);
     60
     61    uint64_t shift = *((uint64_t *)(buffer+pos));
     62    uint64_t mask = gen_mask(slice_size);
     63    uint64_t r = shift & mask;
     64
     65#ifndef NDEBUG
     66    print_register<uint64_t>("shift", shift);
     67    print_register<uint64_t>("mask", mask);
     68#endif
     69    return r;
     70}
     71
     72static IDISA_ALWAYS_INLINE uint64_t bit_compress_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size) {
     73    assert(slice_size > 0 && slice_size <= 64);
     74    assert(slice_size >= hash_size); /* negative shifts undefined in C standard, (x1 >> shift) */
     75
     76#ifndef NDEBUG
     77    cout << endl;
     78    cout << "Stream pos (bit):  " << pos << endl;
     79    cout << "Slice size (bits): "  << slice_size << endl;
     80    cout << "Hash size (bits):   " << hash_size << endl;
     81#endif
     82
     83    uint64_t x0 = bit_offset_slice(b0,pos,slice_size);
     84    uint64_t x1 = bit_offset_slice(b1,pos,slice_size);
     85    uint64_t shift = (slice_size - hash_size);
     86    uint64_t mask = gen_mask(hash_size);
     87    uint64_t r = (x0 ^ (x1 >> shift)) & mask;
     88
     89#ifndef NDEBUG
    9090    print_register<uint64_t>("b0", *(uint64_t *)b0);
    9191    print_register<uint64_t>("b1", *(uint64_t *)b1);
    9292    print_register<uint64_t>("x0", x0);
    9393    print_register<uint64_t>("x1", x1);
    94     print_register<uint64_t>("mask", mask);
    95     print_register<uint64_t>("masked x0", masked_x0);
    96     print_register<uint64_t>("shift masked x1", shift_masked_x1);
     94    print_register<uint64_t>("shift x1", (x1 >> shift));
     95    print_register<uint64_t>("r", r);
    9796#endif
    9897
    99     return (x0 ^ (x1 >> ((lgth*FW) - hash_bits))) & mask;
     98    return r;
    10099}
    101100
     101static IDISA_ALWAYS_INLINE uint64_t byte_compress_hash(const uint8_t * b0, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size) {
     102    assert(slice_size > 0 && slice_size <= 64);
     103    assert(slice_size >= hash_size); // negative shifts undefined in C standard
    102104
     105#ifndef NDEBUG
     106    cout << endl;
     107    cout << "Stream pos (byte):  " << pos << endl;
     108    cout << "Slice size (bits): "  << slice_size << endl;
     109    cout << "Hash size (bits):   " << hash_size << endl;
     110#endif
    103111
     112    uint64_t x0 = *((uint64_t *)(b0+pos));
     113    uint64_t shift = (slice_size - hash_size);  // negative shifts undefined in C standard
     114    uint64_t mask = gen_mask(hash_size);
     115    uint64_t r = (x0 ^ (x0 >> shift)) & mask;
     116
     117#ifndef NDEBUG
     118    print_register<uint64_t>("b0", *(uint64_t *)(b0));
     119    print_register<uint64_t>("x0", x0);
     120    print_register<uint64_t>("x0 >> shift", x0>>shift);
     121    print_register<uint64_t>("r", r);
     122#endif
     123
     124    return r;
     125}
    104126
    105127#endif // BYTE_UTIL_HPP
    106128
     129/*
     130static IDISA_ALWAYS_INLINE uint64_t bit_expand_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size);
     131static IDISA_ALWAYS_INLINE uint64_t bit_expand_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size) {
     132    assert(slice_size > 0 && slice_size <= 64);
     133    //assert(slice_size <= hash_size);
     134
     135    uint64_t x0 = bit_offset_slice(b0,pos,slice_size);
     136    uint64_t x1 = bit_offset_slice(b1,pos,slice_size);
     137    uint64_t mask = gen_mask(hash_size);
     138
     139    assert(x0 != x1);
     140
     141    uint64_t t = x0 ^ x1;
     142    uint64_t r = t;
     143    int32_t shift = slice_size;
     144
     145    print_register<uint64_t>("t", t);
     146    print_register<uint64_t>("r", r);
     147
     148    while(shift > 0) {
     149
     150#ifndef NDEBUG
     151    cout << "Stream pos (bit):  " << pos << endl;
     152    cout << "Symbol lgth (bits): " << slice_size << endl;
     153    cout << "Hash size (bits):   " << hash_size << endl;
     154    cout << "Shift (bits): " << shift << endl;
     155
     156    print_register<uint64_t>("b0", *(uint64_t *)b0);
     157    print_register<uint64_t>("b1", *(uint64_t *)b1);
     158    print_register<uint64_t>("x0", x0);
     159    print_register<uint64_t>("x1", x1);
     160    print_register<uint64_t>("r", r);
     161#endif
     162        r = r | (r << (uint32_t)shift);
     163        shift -= slice_size;
     164        print_register<uint64_t>("r", r);
     165    }
     166
     167    return r & mask;
     168}
     169*/
Note: See TracChangeset for help on using the changeset viewer.