Changeset 1918 for trunk


Ignore:
Timestamp:
Feb 10, 2012, 10:16:53 PM (7 years ago)
Author:
ksherdy
Message:

Update hash functions.

Location:
trunk/lib
Files:
1 added
1 deleted
1 edited
2 moved

Legend:

Unmodified
Added
Removed
  • trunk/lib/hash.hpp

    r1914 r1918  
    88using namespace std;
    99
    10 /*  Hash Functions Templated on Field Width.
     10/* WARNING:     Pad 8 bytes.
     11                Internal static casts of uint8_t * b0 to uint64_t * and deference. */
    1112
    12     slice<1> - bit slice operation
    13     FW=1 'pos' and 'lgth' args in terms of bits,
    14     e.g. pos=1,lgth=7 => begin at bit index 1 and slice symbol of length 7 bits.
    1513
    16     slice<8> - byte slice operation
    17     FW=8 'pos' and 'lgth' args in terms of bytes, i.e. byte at index 1
    18     e.g. pos=1,lgth=4 => begin at byte index position 1 and slice symbol of length 4 bytes.
     14static IDISA_ALWAYS_INLINE uint64_t gen_mask(const uint32_t hash_size);
     15static IDISA_ALWAYS_INLINE uint64_t byte_mask_hash(const uint8_t *b0, const uint32_t pos, const uint32_t hash_size);
    1916
    20     shift_hash<1> - returns a hash value based on bit index and symbol length
    21     shift_hash<8> - returns a hash value based on byte index and symbol length
    22 */
     17/* pos (bits) */
     18static IDISA_ALWAYS_INLINE uint64_t bit_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size);
     19static IDISA_ALWAYS_INLINE uint64_t bit_compress_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size);
    2320
    24 /* WARNING: Casts uint8_t * buffer at pos to uint64_t * and deferences. Segmentation faults w/o sufficient buffer padding. */
    25 template<uint32_t FW>
    26 IDISA_ALWAYS_INLINE uint64_t mask_hash(const uint8_t * b0, const uint32_t hash_bits, const uint32_t lgth);
     21/* pos (bytes) */
     22static IDISA_ALWAYS_INLINE uint64_t byte_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size);
     23static IDISA_ALWAYS_INLINE uint64_t byte_compress_hash(const uint8_t * b0, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size);
    2724
    28 template<uint32_t FW>
    29 IDISA_ALWAYS_INLINE uint64_t slice(const uint8_t * buffer, const uint32_t pos, const uint32_t lgth);
    30 
    31 template<uint32_t FW>
    32 IDISA_ALWAYS_INLINE uint64_t shift_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t hash_bits, const uint32_t lgth);
    33 
    34 template<>
    35 IDISA_ALWAYS_INLINE uint64_t slice<1>(const uint8_t * buffer, const uint32_t pos, const uint32_t lgth) {
    36     assert(lgth > 0 && lgth <= 64);
    37 
    38     uint64_t shift = *((uint64_t *)(buffer + (pos/8))) >> (pos & 8-1);
    39     uint64_t slice_mask = UINT64_MAX >> ((sizeof(uint64_t)*8 - lgth));
    40 #ifndef NDEBUG
    41     print_register<uint64_t>("shift", shift);
    42     print_register<uint64_t>("slice mask", slice_mask);
    43 #endif
    44     return shift & slice_mask;
    45 }
    46 
    47 template<>
    48 IDISA_ALWAYS_INLINE uint64_t slice<8>(const uint8_t * buffer, const uint32_t pos, const uint32_t lgth) {
    49     assert(lgth > 0 && lgth <= 8);
    50 
    51     uint64_t shift = *((uint64_t *)(buffer+pos));
    52     uint64_t slice_mask = UINT64_MAX >> ((sizeof(uint64_t) - lgth)*8);
    53 #ifndef NDEBUG
    54     print_register<uint64_t>("shift", shift);
    55     print_register<uint64_t>("slice mask", slice_mask);
    56 #endif
    57     return shift & slice_mask;
    58 }
    59 
    60 template<uint32_t FW>
    61 IDISA_ALWAYS_INLINE uint64_t mask_hash(const uint8_t * b0, const uint32_t hash_bits, const uint32_t lgth) {
    62     assert(lgth > 0 && lgth*FW <= 64);
    63     assert((lgth*FW) > hash_bits);
     25static IDISA_ALWAYS_INLINE uint64_t gen_mask(const uint32_t hash_size) {
     26    assert(hash_size > 0);
    6427
    6528    const uint64_t ONE = 1;
    66     uint64_t mask = (ONE << hash_bits) - ONE;
     29    uint64_t mask = (ONE << hash_size) - ONE;
    6730#ifndef NDEBUG
    6831    print_register<uint64_t>("mask", mask);
    6932#endif
    70     return *((uint64_t *)b0) & mask;
     33    return mask;
    7134}
    7235
    73 template<uint32_t FW>
    74 IDISA_ALWAYS_INLINE uint64_t shift_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t hash_bits, const uint32_t lgth) {
    75     assert(lgth > 0 && lgth*FW <= 64);
    76     assert((lgth*FW) > hash_bits);
     36static IDISA_ALWAYS_INLINE uint64_t byte_mask_hash(const uint8_t *b0, const uint32_t pos, const uint32_t hash_size) {
     37    uint64_t mask = gen_mask(hash_size);
     38    return *((uint64_t *)(b0 + pos)) & mask;
     39}
    7740
    78     uint64_t x0 = slice<FW>(b0,pos,lgth);
    79     uint64_t x1 = slice<FW>(b1,pos,lgth);
    80     const uint64_t ONE = 1;
    81     uint64_t mask = (ONE << hash_bits) - ONE;
     41/* pos (bits) */
     42static IDISA_ALWAYS_INLINE uint64_t bit_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size) {
     43    assert(slice_size > 0 && slice_size <= 64);
     44
     45    uint64_t shift = *((uint64_t *)(buffer + (pos/8))) >> (pos & 8-1);
     46    uint64_t mask = gen_mask(slice_size);
     47    uint64_t r = shift & mask;
    8248
    8349#ifndef NDEBUG
    84     cout << "Position:  " << pos << endl;
    85     cout << "Symbol Lgth: " << lgth << endl;
    86     cout << "Hash Bits:   " << hash_bits << endl;
     50    print_register<uint64_t>("shift", shift);
     51    print_register<uint64_t>("mask", mask);
     52#endif
    8753
    88     uint64_t masked_x0 = x0 & mask;
    89     uint64_t shift_masked_x1 = (x1 >> ((lgth*FW) - hash_bits)) & mask;
     54    return r;
     55}
     56
     57/* pos (bytes) */
     58static IDISA_ALWAYS_INLINE uint64_t byte_offset_slice(const uint8_t * buffer, const uint32_t pos, const uint32_t slice_size) {
     59    assert(slice_size > 0 && slice_size <= 64);
     60
     61    uint64_t shift = *((uint64_t *)(buffer+pos));
     62    uint64_t mask = gen_mask(slice_size);
     63    uint64_t r = shift & mask;
     64
     65#ifndef NDEBUG
     66    print_register<uint64_t>("shift", shift);
     67    print_register<uint64_t>("mask", mask);
     68#endif
     69    return r;
     70}
     71
     72static IDISA_ALWAYS_INLINE uint64_t bit_compress_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size) {
     73    assert(slice_size > 0 && slice_size <= 64);
     74    assert(slice_size >= hash_size); /* negative shifts undefined in C standard, (x1 >> shift) */
     75
     76#ifndef NDEBUG
     77    cout << endl;
     78    cout << "Stream pos (bit):  " << pos << endl;
     79    cout << "Slice size (bits): "  << slice_size << endl;
     80    cout << "Hash size (bits):   " << hash_size << endl;
     81#endif
     82
     83    uint64_t x0 = bit_offset_slice(b0,pos,slice_size);
     84    uint64_t x1 = bit_offset_slice(b1,pos,slice_size);
     85    uint64_t shift = (slice_size - hash_size);
     86    uint64_t mask = gen_mask(hash_size);
     87    uint64_t r = (x0 ^ (x1 >> shift)) & mask;
     88
     89#ifndef NDEBUG
    9090    print_register<uint64_t>("b0", *(uint64_t *)b0);
    9191    print_register<uint64_t>("b1", *(uint64_t *)b1);
    9292    print_register<uint64_t>("x0", x0);
    9393    print_register<uint64_t>("x1", x1);
    94     print_register<uint64_t>("mask", mask);
    95     print_register<uint64_t>("masked x0", masked_x0);
    96     print_register<uint64_t>("shift masked x1", shift_masked_x1);
     94    print_register<uint64_t>("shift x1", (x1 >> shift));
     95    print_register<uint64_t>("r", r);
    9796#endif
    9897
    99     return (x0 ^ (x1 >> ((lgth*FW) - hash_bits))) & mask;
     98    return r;
    10099}
    101100
     101static IDISA_ALWAYS_INLINE uint64_t byte_compress_hash(const uint8_t * b0, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size) {
     102    assert(slice_size > 0 && slice_size <= 64);
     103    assert(slice_size >= hash_size); // negative shifts undefined in C standard
    102104
     105#ifndef NDEBUG
     106    cout << endl;
     107    cout << "Stream pos (byte):  " << pos << endl;
     108    cout << "Slice size (bits): "  << slice_size << endl;
     109    cout << "Hash size (bits):   " << hash_size << endl;
     110#endif
    103111
     112    uint64_t x0 = *((uint64_t *)(b0+pos));
     113    uint64_t shift = (slice_size - hash_size);  // negative shifts undefined in C standard
     114    uint64_t mask = gen_mask(hash_size);
     115    uint64_t r = (x0 ^ (x0 >> shift)) & mask;
     116
     117#ifndef NDEBUG
     118    print_register<uint64_t>("b0", *(uint64_t *)(b0));
     119    print_register<uint64_t>("x0", x0);
     120    print_register<uint64_t>("x0 >> shift", x0>>shift);
     121    print_register<uint64_t>("r", r);
     122#endif
     123
     124    return r;
     125}
    104126
    105127#endif // BYTE_UTIL_HPP
    106128
     129/*
     130static IDISA_ALWAYS_INLINE uint64_t bit_expand_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size);
     131static IDISA_ALWAYS_INLINE uint64_t bit_expand_hash(const uint8_t * b0, const uint8_t * b1, const uint32_t pos, const uint32_t slice_size, const uint32_t hash_size) {
     132    assert(slice_size > 0 && slice_size <= 64);
     133    //assert(slice_size <= hash_size);
     134
     135    uint64_t x0 = bit_offset_slice(b0,pos,slice_size);
     136    uint64_t x1 = bit_offset_slice(b1,pos,slice_size);
     137    uint64_t mask = gen_mask(hash_size);
     138
     139    assert(x0 != x1);
     140
     141    uint64_t t = x0 ^ x1;
     142    uint64_t r = t;
     143    int32_t shift = slice_size;
     144
     145    print_register<uint64_t>("t", t);
     146    print_register<uint64_t>("r", r);
     147
     148    while(shift > 0) {
     149
     150#ifndef NDEBUG
     151    cout << "Stream pos (bit):  " << pos << endl;
     152    cout << "Symbol lgth (bits): " << slice_size << endl;
     153    cout << "Hash size (bits):   " << hash_size << endl;
     154    cout << "Shift (bits): " << shift << endl;
     155
     156    print_register<uint64_t>("b0", *(uint64_t *)b0);
     157    print_register<uint64_t>("b1", *(uint64_t *)b1);
     158    print_register<uint64_t>("x0", x0);
     159    print_register<uint64_t>("x1", x1);
     160    print_register<uint64_t>("r", r);
     161#endif
     162        r = r | (r << (uint32_t)shift);
     163        shift -= slice_size;
     164        print_register<uint64_t>("r", r);
     165    }
     166
     167    return r & mask;
     168}
     169*/
  • trunk/lib/test/Makefile

    r1913 r1918  
    1919        $(CC) -o byte_compare_test byte_compare_test.cpp $(AFLAGS)
    2020
    21 hash_util_test: hash_util_test.cpp ../hash_util.hpp
    22         $(CC) -o hash_util_test hash_util_test.cpp $(AFLAGS)
     21hash_test:      hash_test.cpp ../hash.hpp
     22        $(CC) -o hash_test hash_test.cpp $(AFLAGS)
    2323
    2424clean:
    25         rm -f byte_compare_generator byte_compare_test byte_compare_test.cpp hash_util_test
     25        rm -f byte_compare_generator byte_compare_test byte_compare_test.cpp hash_test
  • trunk/lib/test/hash_test.cpp

    r1915 r1918  
    1 #include "../hash_util.hpp"
     1#include "../hash.hpp"
    22#include <iostream>
    33using namespace std;
     
    1313    uint32_t hash_size = 0;
    1414
     15    uint64_t a = 1;
     16    uint64_t b = 2;
     17
    1518    print_register("b0", *(uint64_t *) b0);
    1619    print_register("b1", *(uint64_t *) b1);
     
    1821    print_register("b3", *(uint64_t *) b3);
    1922
    20     for(uint32_t mask_bits=1;mask_bits<64;mask_bits++) {
    21         r = mask_hash<8>((uint8_t *) &b3, mask_bits, 8);
    22         print_register("r", r);
    23     }
    24 
    25     // Length 8 (bits), Expect 41
     23    // Expect 41
    2624    /*
    27     for(uint32_t idx=0;idx<64;idx+=8) {
    28         r = slice<1>((uint8_t *) &b2,idx,8);
     25    for(uint32_t pos=0;pos<64;pos+=8) {
     26        r = bit_offset_slice((uint8_t *) &b2,pos,8);
    2927        print_register("r", r);
    3028    }
    3129    */
    3230
    33     // Length 16 (bits), Expect 4141
     31    // Expect 4141
    3432    /*
    35     for(uint32_t idx=0;idx<64-8;idx+=8) {
    36         r = slice<1>((uint8_t *) &b2,idx,16);
     33    for(uint32_t pos=0;pos<64-8;pos+=8) {
     34        r = bit_offset_slice((uint8_t *) &b2,pos,16);
    3735        print_register("r", r);
    3836    }
    3937    */
    4038
    41     // Length 8 (bits), idx 1,9,..., Expect A0
     39    // Length 8 (bits), pos 1,9,..., Expect A0
    4240    /*
    43     for(uint32_t idx=1;idx<64-8;idx+=8) {
    44         r = slice<1>((uint8_t *) &b2,idx,8);
     41    for(uint32_t pos=1;pos<64-8;pos+=8) {
     42        r = bit_offset_slice((uint8_t *) &b2,pos,8);
    4543        print_register("r", r);
    4644    }
    4745    */
    4846
    49     // Length 16 (bits), idx 1,9,..., Expect A0A0
     47    // Length 16 (bits), pos 1,9,..., Expect A0A0
    5048    /*
    51     for(uint32_t idx=1;idx<64-16;idx+=8) {
    52         r = slice<1>((uint8_t *) &b2,idx,16);
     49    for(uint32_t pos=1;pos<64-16;pos+=8) {
     50        r = bit_offset_slice((uint8_t *) &b2,pos,16);
    5351        print_register("r", r);
    5452    }
    5553    */
    5654
    57     // Length 8 (bits), idx 2,10,..., Expect 50
     55    // Length 8 (bits), pos 2,10,..., Expect 50
    5856    /*
    59     for(uint32_t idx=2;idx<64-8;idx+=8) {
    60         r = slice<1>((uint8_t *) &b2,idx,8);
     57    for(uint32_t pos=2;pos<64-8;pos+=8) {
     58        r = bit_offset_slice((uint8_t *) &b2,pos,8);
    6159        print_register("r", r);
    6260    }
    6361    */
    6462
    65     // Length 16 (bits), idx 2,10,..., Expect 5050
     63    // Length 16 (bits), pos 2,10,..., Expect 5050
    6664    /*
    67     for(uint32_t idx=2;idx<64-16;idx+=8) {
    68         r = slice<1>((uint8_t *) &b2,idx,16);
     65    for(uint32_t pos=2;pos<64-16;pos+=8) {
     66        r = bit_offset_slice((uint8_t *) &b2,pos,16);
    6967        print_register("r", r);
    7068    }
    7169    */
    7270
    73     // Length 8 (bits), idx 3,11,..., Expect 28
     71    // Length 8 (bits), pos 3,11,..., Expect 28
    7472    /*
    75     for(uint32_t idx=3;idx<64-8;idx+=8) {
    76         r = slice<1>((uint8_t *) &b2,idx,8);
     73    for(uint32_t pos=3;pos<64-8;pos+=8) {
     74        r = bit_offset_slice((uint8_t *) &b2,pos,8);
    7775        print_register("r", r);
    7876    }
    7977    */
    8078
    81     // Length 16 (bits), idx 3,11,..., Expect 2828
     79    // Length 16 (bits), pos 3,11,..., Expect 2828
    8280    /*
    83     for(uint32_t idx=3;idx<64-16;idx+=8) {
    84         r = slice<1>((uint8_t *) &b2,idx,16);
     81    for(uint32_t pos=3;pos<64-16;pos+=8) {
     82        r = bit_offset_slice((uint8_t *) &b2,pos,16);
    8583        print_register("r", r);
    8684    }
    8785    */
    8886
    89     // Length 8 (bits), idx 3,11,..., Expect 05
     87    // Length 8 (bits), pos 3,11,..., Expect 05
    9088    /*
    91     for(uint32_t idx=6;idx<64-8;idx+=8) {
    92         r = slice<1>((uint8_t *) &b2,idx,8);
     89    for(uint32_t pos=6;pos<64-8;pos+=8) {
     90        r = bit_offset_slice((uint8_t *) &b2,pos,8);
    9391        print_register("r", r);
    9492    }
    9593    */
    9694
    97     // Length 16 (bits), idx 3,11,..., Expect 0505
     95    // Length 16 (bits), pos 3,11,..., Expect 0505
    9896    /*
    99     for(uint32_t idx=6;idx<64-16;idx+=8) {
    100         r = slice<1>((uint8_t *) &b2,idx,16);
     97    for(uint32_t pos=6;pos<64-16;pos+=8) {
     98        r = bit_offset_slice((uint8_t *) &b2,pos,16);
    10199        print_register("r", r);
    102100    }
    103101    */
    104102
    105     // Length 8 (bits), Expect shift_hash<8>, shift_hash<1> to return equal values.
     103    /*
     104    for(uint32_t mask_bits=1;mask_bits<64;mask_bits++) {
     105        r = byte_mask_hash((uint8_t *) &b0, 0, mask_bits);
     106        print_register("r", r);
     107    }
     108    */
     109
     110    // Length 8 (bits)
    106111    /*
    107112    hash_size = 8;
    108     for(uint32_t idx=0;idx<sizeof(uint64_t);idx++) {
     113    for(uint32_t pos=0;pos<sizeof(uint64_t);pos++) {
    109114
    110         // shift_hash<1>
    111         r = shift_hash<1>((uint8_t *) &b0,(uint8_t *) &b2, idx*8, hash_size, 8);
    112         cout << dec << "shift_hash<1> = " << r << endl << endl;
    113         cout << endl << endl;
    114 
    115         r = shift_hash<8>((uint8_t *) &b0,(uint8_t *) &b2, idx, hash_size, 1);
    116         cout << dec << "shift_hash<8> = " << r << endl << endl;
     115        r = bit_compress_hash((uint8_t *) &b0,(uint8_t *) &b2, pos*8, 8, hash_size);
     116        cout << dec << "bit_compress_hash = " << r << endl << endl;
    117117        cout << endl << endl;
    118118
     
    120120    */
    121121
    122     // Length 16 (bits), Expect shift_hash<8>, shift_hash<1> to return equal values.
    123122    /*
    124     hash_size = 16;
    125     for(uint32_t idx=0;idx<sizeof(uint64_t)-1;idx++) {
     123    uint8_t pos, slice;
     124    hash_size = 8;
    126125
    127         // shift_hash<1>
    128         r = shift_hash<1>((uint8_t *) &b1,(uint8_t *) &b2, idx*8, hash_size, 16);
    129         cout << dec << "shift_hash<1> = " << r << endl << endl;
    130         cout << endl << endl;
     126    slice = 16;
     127    pos = 0;
     128    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
    131129
    132         r = shift_hash<8>((uint8_t *) &b1,(uint8_t *) &b2, idx, hash_size, 2);
    133         cout << dec << "shift_hash<8> = " << r << endl << endl;
    134         cout << endl << endl;
     130    pos = 1;
     131    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
    135132
    136     }
    137     */
     133    slice = 24;
     134    pos = 0;
     135    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
    138136
    139     // Some shift_hash<1> cases.
    140     /*
    141     hash_size = 9;
    142     for(uint32_t lgth=16;lgth<sizeof(uint64_t)*8+1;lgth+=16) {
    143         for(uint32_t idx=1;idx<sizeof(uint64_t)*8-lgth+1;idx++) {
    144              r = shift_hash<1>((uint8_t *) &b0,(uint8_t *) &b1, idx, hash_size, lgth);
    145              cout << dec << "shift_hash<1> = " << r << endl << endl;
    146              cout  << endl;
    147         }
    148     }
    149     */
     137    pos = 1;
     138    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
    150139
    151     // Some shift_hash<8> cases.
    152     /*
    153     hash_size = 8;
    154     for(uint32_t lgth=2;lgth<sizeof(uint64_t)+1;lgth++) {
    155         for(uint32_t idx=0;idx<sizeof(uint64_t)-lgth+1;idx++) {
    156              r = shift_hash<8>((uint8_t *) &b0,(uint8_t *) &b1, idx, hash_size, lgth);
    157              cout << dec << "shift_hash<8> = " << r << endl;
    158              cout << endl << endl;
    159         }
    160     }
     140    slice = 32;
     141
     142    pos = 0;
     143    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
     144
     145    pos = 1;
     146    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
     147
     148    pos = 2;
     149    r = byte_compress_hash((uint8_t *) &b0, pos, slice, hash_size);
    161150    */
    162151
Note: See TracChangeset for help on using the changeset viewer.