Changeset 2106


Ignore:
Timestamp:
May 16, 2012, 3:02:58 PM (7 years ago)
Author:
ksherdy
Message:

Added bit / byte strategy. Added length test to hash table.

Location:
trunk/symbol_table
Files:
1 added
11 edited
2 moved

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/Makefile

    r2103 r2106  
    3030LOGBASE2_GROUP_STRMS = logbase2_group_strms.py
    3131DIV2_LOGBASE2_GROUP_STRMS = div2_logbase2_group_strms.py
    32 SINGLE_GROUP_STRMS = single_group_strms.py
     32BIT_BYTE_GROUP_STRMS = bit_byte_group_strms.py
    3333
    3434GROUP_PREFIX=@group_strms_
     
    5151        python $(PABLO_COMPILER) $(MARKER_PABLO_SRC) -t $(MAIN_OUTFILE) -l $(MARKER_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG)
    5252
    53 single: markers hash single_group src/main.cpp
     53bit_byte: markers hash bit_byte_group src/main.cpp
    5454        python $(PABLO_COMPILER) $(MARKER_PABLO_SRC) -t $(MAIN_OUTFILE) -l $(MARKER_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG)
    5555
     
    7676        python $(PABLO_COMPILER) $(DIV2_LOGBASE2_GROUP_STRMS) -t $(MAIN_TEMPLATE) -l $(GROUP_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG)
    7777
    78 single_group: single_group_strms.py group_strms_template.hpp main_template.cpp
    79         python $(PABLO_COMPILER) $(SINGLE_GROUP_STRMS) -t $(GROUP_TEMPLATE) -l $(GROUP_PREFIX) -o $(GROUP_OUTFILE) $(PABLO_ADD_DEBUG)
    80         python $(PABLO_COMPILER) $(SINGLE_GROUP_STRMS) -t $(MAIN_TEMPLATE) -l $(GROUP_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG)
     78bit_byte_group: bit_byte_group_strms.py group_strms_template.hpp main_template.cpp
     79        python $(PABLO_COMPILER) $(BIT_BYTE_GROUP_STRMS) -t $(GROUP_TEMPLATE) -l $(GROUP_PREFIX) -o $(GROUP_OUTFILE) $(PABLO_ADD_DEBUG)
     80        python $(PABLO_COMPILER) $(BIT_BYTE_GROUP_STRMS) -t $(MAIN_TEMPLATE) -l $(GROUP_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG)
    8181
    8282clean:
  • trunk/symbol_table/bit_byte_group_strms.py

    r2103 r2106  
    55        starts = 0
    66        follows = 0
    7         follows_0 = 0
     7        follows_7 = 0 # bit
     8        follows_0 = 0 # byte
    89
    910def Gen_lgth_groups(groups):
    10         groups.follows_0 = groups.follows
     11
     12        # Groups symbols of length 1 to length 7
     13
     14        follows_gt_0 = groups.follows
     15
     16        follows_mask_1_2 = pablo.Advance(groups.starts) | pablo.Advance(pablo.Advance(groups.starts))
     17
     18        follows_mask_3_4 = pablo.Advance(pablo.Advance(follows_mask_1_2))
     19        follows_mask_5_6 = pablo.Advance(pablo.Advance(follows_mask_3_4))
     20        follows_mask_6_7 = pablo.Advance(follows_mask_5_6)
     21        follows_mask_1_7 = follows_mask_1_2 | follows_mask_3_4 | follows_mask_5_6 | follows_mask_6_7
     22
     23        # Naive Advance
     24        groups.follows_7 = follows_gt_0 & follows_mask_1_7
     25
     26        # Advance 32 and Interpose
     27        #temp32 = pablo.Advance32 (follows_mask_1_7)
     28        #groups.follows_16 = follows_gt_8 & interpose32 (follows_mask_1_7, temp32, 7)
     29
     30        follows_gt_7 = follows_gt_0 &~ groups.follows_7
     31
     32        # Groups symbols of length greater than 7
     33        groups.follows_0 = follows_gt_7
     34
    1135
    1236def Main(groups):
  • trunk/symbol_table/demo_strms.py

    r2103 r2106  
    44import logbase2_group_strms
    55import div2_logbase2_group_strms
     6import bit_byte_group_strms
    67import sys
    78
     
    9798        ('groups.follows_0', bitstream2string(groups.follows_0, lgth))])
    9899
     100def Demo_bit_byte(u8data, basis_bits):
     101
     102    lgth = len(u8data)
     103
     104    markers = marker_strms.Markers()
     105    marker_strms.Classify_markers(basis_bits, markers)
     106    marker_strms.Generate_markers(markers)
     107
     108    groups = bit_byte_group_strms.Groups()
     109    groups.starts = markers.starts
     110    groups.follows = markers.follows
     111    bit_byte_group_strms.Gen_lgth_groups(groups)
     112
     113    print_aligned_streams([('Input Data', u8data),
     114        ('markers.spans', bitstream2string(markers.spans, lgth)),
     115        ('markers.starts', bitstream2string(markers.starts, lgth)),
     116        ('markers.follows', bitstream2string(markers.follows, lgth)),
     117        ('groups.starts', bitstream2string(groups.starts, lgth)),
     118        ('groups.follows', bitstream2string(groups.follows, lgth)),
     119        ('groups.follows_7', bitstream2string(groups.follows_7, lgth)),
     120        ('groups.follows_0', bitstream2string(groups.follows_0, lgth))])
     121
    99122if __name__ == "__main__":
    100123
     
    119142    # Demo_div2(u8data, basis_bits)
    120143    # Demo_logbase2(u8data, basis_bits)
    121     Demo_div2_logbase2(u8data, basis_bits)
     144    # Demo_div2_logbase2(u8data, basis_bits)
     145    Demo_bit_byte(u8data, basis_bits)
    122146
  • trunk/symbol_table/div2_logbase2_group_strms.py

    r2103 r2106  
    4141        follows_mask_1_8 = (follows_mask_1_2 | follows_mask_3_4 | follows_mask_5_6 | follows_mask_7_8)
    4242        # Naive Advance
    43         follows_mask_9_16 = pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(follows_mask_1_8))))))))
    44         groups.follows_16 = follows_gt_8 & follows_mask_9_16
     43        #follows_mask_9_16 = pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(follows_mask_1_8))))))))
     44        #groups.follows_16 = follows_gt_8 & follows_mask_9_16
    4545
    4646        # Advance 32 and Interpose
    47         #temp32 = pablo.Advance32 (follows_mask_1_8)
    48         #groups.follows_16 = follows_gt_8 & interpose32 (follows_mask_1_8, temp32, 8)
     47        temp32 = pablo.Advance32 (follows_mask_1_8)
     48        groups.follows_16 = follows_gt_8 & interpose32 (follows_mask_1_8, temp32, 8)
    4949
    5050        follows_gt_16 = follows_gt_8 &~ groups.follows_16
  • trunk/symbol_table/src/Makefile

    r2103 r2106  
    3030        $(CC) -o div2_logbase2 main.cpp $(AFLAGS) -DDIV2_LOGBASE2_STRATEGY -DBUFFER_PROFILING #-DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG  #-DHASH_TABLE_HPP_DEBUG
    3131
    32 single: basis_bits.hpp buffer.hpp byte_pool.hpp hash_strms.hpp  hash_table.hpp ../lib/hash.hpp  group_strms.hpp  symbol_table.hpp  main.cpp  marker_strms.hpp symbol_table.hpp transpose.hpp
    33         $(CC) -o single main.cpp $(AFLAGS) -DSINGLE_STRATEGY -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG  #-DHASH_TABLE_HPP_DEBUG
     32bit_byte: basis_bits.hpp buffer.hpp byte_pool.hpp hash_strms.hpp  hash_table.hpp ../lib/hash.hpp  group_strms.hpp  symbol_table.hpp  main.cpp  marker_strms.hpp symbol_table.hpp transpose.hpp
     33        $(CC) -o bit_byte main.cpp $(AFLAGS) -DBIT_BYTE_STRATEGY -DBUFFER_PROFILING #-DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG  #-DHASH_TABLE_HPP_DEBUG
    3434
    3535
     
    5555        diff -rq --exclude=".svn" $(TEST_SRC_DIR) $(TEST_DST_DIR) || exit 0
    5656
    57 single_diff_test: single
    58         $(CC) -o single main.cpp $(AFLAGS) -DSINGLE_STRATEGY -DIDENTITY_TEST
    59         python $(TEST_SCRIPT) single -d
     57bit_byte_diff_test: bit_byte
     58        $(CC) -o bit_byte main.cpp $(AFLAGS) -DBIT_BYTE_STRATEGY -DIDENTITY_TEST
     59        python $(TEST_SCRIPT) bit_byte -d
    6060        diff -rq --exclude=".svn" $(TEST_SRC_DIR) $(TEST_DST_DIR) || exit 0
    6161
     
    7777        python $(TEST_SCRIPT) div2_logbase2 -g
    7878
    79 single_gid_test:
    80         $(CC) -o single main.cpp $(AFLAGS) -DSINGLE_STRATEGY -DGID_TEST
    81         python $(TEST_SCRIPT) single -g
     79bit_byte_gid_test:
     80        $(CC) -o bit_byte main.cpp $(AFLAGS) -DBIT_BYTE_STRATEGY -DGID_TEST
     81        python $(TEST_SCRIPT) bit_byte -g
    8282
    8383clean:
    84         rm -Rf id div2 logbase2 div2_logbase2 single $(TEST_DST_DIR)
     84        rm -Rf id div2 logbase2 div2_logbase2 bit_byte $(TEST_DST_DIR)
    8585
    8686# valgrind --tool=callgrind --callgrind-out-file=./callgrind.out ./logbase2 ../test/in/\(1_1000_10\)_\(2_1000_10\)_\(3_1000_10\)_\(4_1000_10\)_\(5_1000_10\)_\(6_1000_10\)_\(7_1000_10\)_\(8_1000_10\)_\(9_1000_10\)_\(10_1000_10\)_\(11_1000_10\)_\(12_1000_10\)_\(13_1000_10\)_\(14_1000_10\)_\(15_1000_10\)_\(16_1000_10\)_\(17_1000_10\)_\(18_1000_10\)_\(19_1000_10\)_1_1.test
  • trunk/symbol_table/src/compare_strategy.hpp

    r2103 r2106  
    22#define COMPARE_STRATEGY_HPP
    33
    4 #include "types.hpp"
     4#include "strategy_types.hpp"
    55
    66///////////////////////////////////////////////////////////////////////////////
     
    187187
    188188///////////////////////////////////////////////////////////////////////////////
    189 // Single specialized
    190 ///////////////////////////////////////////////////////////////////////////////
    191 template<> class compare_strategy_t<0,single> {
    192 public:
    193     static IDISA_ALWAYS_INLINE bool compare(uint8_t * x, uint8_t * y, const uint32_t lgth) {
    194         return overlap_compare<uint8_t>(x,y,lgth);
     189// Bit Byte specialized
     190///////////////////////////////////////////////////////////////////////////////
     191template<> class compare_strategy_t<0,bit_byte> {
     192public:
     193    static IDISA_ALWAYS_INLINE bool compare(uint8_t * x, uint8_t * y, const uint32_t lgth) {
     194        return overlap_compare<uint64_t>(x,y,lgth);
     195    }
     196};
     197
     198template<> class compare_strategy_t<7,bit_byte> {
     199public:
     200    static IDISA_ALWAYS_INLINE bool compare(uint8_t * x, uint8_t * y, const uint32_t lgth) {
     201        return mem_compare(x,y,lgth); //overlap_compare<uint8_t>(x,y,lgth);
    195202    }
    196203};
  • trunk/symbol_table/src/hash_strategy.hpp

    r2092 r2106  
    22#define HASH_STRATEGY_HPP
    33
    4 #include "types.hpp"
     4#include "strategy_types.hpp"
    55
    66#define MAX_HASH_BITS 17 // Maximum length specific hash_strategy
     
    4040
    4141///////////////////////////////////////////////////////////////////////////////
     42// Identity - Default Case - Length 0 specialized for length > 16.
     43///////////////////////////////////////////////////////////////////////////////
     44template<int SCALE_FACTOR> class hash_strategy_t<7,bit_byte,SCALE_FACTOR> {
     45public:
     46    static IDISA_ALWAYS_INLINE uint64_t hash(const uint8_t * h0, const uint8_t * h1, const int32_t idx, const uint32_t slice_bits, const uint32_t hash_bits) {
     47        return bit_compress_hash(h0, h1, idx*SCALE_FACTOR, slice_bits, hash_bits); // expect h0 as hash bit stream
     48    }
     49    static IDISA_ALWAYS_INLINE uint32_t max_hashsize() { return 1*SCALE_FACTOR; }
     50};
     51
     52template<int SCALE_FACTOR> class hash_strategy_t<0,bit_byte,SCALE_FACTOR> {
     53public:
     54    static IDISA_ALWAYS_INLINE uint64_t hash(const uint8_t * h0, const uint8_t * h1, const int32_t idx, const uint32_t slice_bits, const uint32_t hash_bits) {
     55        return bit_compress_hash(h0, h1, idx*SCALE_FACTOR, slice_bits, hash_bits); // expect h0 as hash bit stream
     56    }
     57    static IDISA_ALWAYS_INLINE uint32_t max_hashsize() { return 8; }
     58};
     59
     60///////////////////////////////////////////////////////////////////////////////
    4261// Identity - Length 1 specialized.
    4362///////////////////////////////////////////////////////////////////////////////
     
    5978
    6079///////////////////////////////////////////////////////////////////////////////
    61 // Bit Space / Byte Space - div2 == id WTE bit/byte boundardy.
     80// Bit Space / Byte Space - div2 == id wit the exception of bit/byte boundardy.
    6281///////////////////////////////////////////////////////////////////////////////
    6382template<uint32_t LGTH, int SCALE_FACTOR> class hash_strategy_t<LGTH,div2,SCALE_FACTOR>: public hash_strategy_t<LGTH,id,SCALE_FACTOR>{};
  • trunk/symbol_table/src/hash_table.hpp

    r2103 r2106  
    133133
    134134        while(NULL != crt) {
    135             if(this->compare_strategy.compare(&raw_bytes[idx], crt->raw_bytes, raw_byte_lgth)) {
     135            if((raw_byte_lgth == crt->raw_bytes_lgth)) {
     136                    if (this->compare_strategy.compare(&raw_bytes[idx], crt->raw_bytes, crt->raw_bytes_lgth)) {
    136137                                gid = crt->gid;
    137138                                return true;
     139                    }
    138140            }
    139141            prev = crt;
     
    387389
    388390        while(NULL != crt) {
    389             if(this->compare_strategy.compare(&raw_bytes[idx], crt->raw_bytes, raw_byte_lgth)) {
     391            if(raw_byte_lgth == crt->raw_bytes_lgth) {
     392                if(this->compare_strategy.compare(&raw_bytes[idx], crt->raw_bytes, crt->raw_bytes_lgth)) {
    390393                                gid = crt->gid;
    391394                                return true;
     395                }
    392396            }
    393397            prev = crt;
     
    451455
    452456///////////////////////////////////////////////////////////////////////////
    453 // single - Single Strategy
     457// bit_byte - Strategy
    454458///////////////////////////////////////////////////////////////////////////
    455459template<uint32_t LGTH, int GROUP_STRATEGY, int SCALE_FACTOR, class ALLOCATOR>
    456 class single_hash_table : public hash_table<LGTH, GROUP_STRATEGY, SCALE_FACTOR, ALLOCATOR> {};
    457 
     460class bit_byte_hash_table : public hash_table<LGTH, GROUP_STRATEGY, SCALE_FACTOR, ALLOCATOR> {};
    458461
    459462#endif // HASH_TABLE_HPP
  • trunk/symbol_table/src/main.cpp

    r2103 r2106  
    213213    // Final partial block or any carry
    214214    ///////////////////////////////////////////////////////////////////////////
    215     if (remaining > 0 || generate_markers.carryQ.CarryTest(0, 2) || 1 /*|| hash_strms_any_carry*/) {
     215    if (remaining > 0 || generate_markers.carryQ.CarryTest(0, 2) || gen_lgth_groups.carryQ.CarryTest(0, 9) /*|| hash_strms_any_carry*/) {
    216216        BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    217217        s2p_do_final_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk], EOF_mask);
  • trunk/symbol_table/src/strategy_types.hpp

    r2103 r2106  
    1 #ifndef TYPES_HPP
    2 #define TYPES_HPP
     1#ifndef STRATEGY_TYPES
     2#define STRATEGY_TYPES
    33
    4 enum group_strategy { id, div2, logbase2, single };
     4enum group_strategy { id, div2, logbase2, bit_byte };
    55enum scale_factor { bit=1, byte=8 };
    66
    7 #endif // TYPES_HPP
     7#endif // STRATEGY_TYPES
  • trunk/symbol_table/src/symbol_table.hpp

    r2103 r2106  
    1515
    1616
    17 #include "types.hpp"
     17#include "strategy_types.hpp"
    1818#include "buffer.hpp"
    1919#include "gid.hpp"
     
    6868};
    6969
    70 // TODO - Refactor as a single mixed symbol table class composed of Id, Div2, Log2 hash tables.
     70
    7171template<class GIDS, class ALLOCATOR>
    7272class symbol_table {
     
    7474        symbol_table()/*:hash_table_1(256)*/{}
    7575        ~symbol_table() {
    76 //      hash_table_1.print_table();
    77 //      hash_table_2.print_table();
    78 //      hash_table_3.print_table();
    79 //      hash_table_4.print_table();
    80 //      hash_table_5.print_table();
    81 //      hash_table_6.print_table();
    82 //      hash_table_7.print_table();
    83 //      hash_table_8.print_table();
    84 //      hash_table_9.print_table();
    85 //      hash_table_10.print_table();
    86 //      hash_table_11.print_table();
    87 //      hash_table_12.print_table();
    88 //      hash_table_13.print_table();
    89 //      hash_table_14.print_table();
    90 //      hash_table_15.print_table();
    91 //      hash_table_16.print_table();
    92 //      hash_table_0.print_table();
     76//      hash_table_x.print_table();
    9377#ifdef HASH_TABLE_HPP_DEBUG
    94 //      hash_table_1.print_diagnostics();
    95 //      hash_table_2.print_diagnostics();
    96 //      hash_table_3.print_diagnostics();
    97 //      hash_table_4.print_diagnostics();
    98 //      hash_table_5.print_diagnostics();
    99 //      hash_table_6.print_diagnostics();
    100 //      hash_table_7.print_diagnostics();
    101 //      hash_table_8.print_diagnostics();
    102 //      hash_table_9.print_diagnostics();
    103 //      hash_table_10.print_diagnostics();
    104 //      hash_table_11.print_diagnostics();
    105 //      hash_table_12.print_diagnostics();
    106 //      hash_table_13.print_diagnostics();
    107 //      hash_table_14.print_diagnostics();
    108 //      hash_table_15.print_diagnostics();
    109 //      hash_table_16.print_diagnostics();
    110 //      hash_table_0.print_diagnostics();
     78//      hash_table_x.print_diagnostics();
    11179#endif
    11280        }
     
    231199                            BIT_HASH_VARIABLE(logbase2,bit,16);
    232200                            BIT_HASH_VARIABLE(id,bit,0);
    233                         #elif SINGLE_STRATEGY
    234                             BIT_HASH_VARIABLE(single,bit,0);
     201                        #elif BIT_BYTE_STRATEGY
     202                            BYTE_HASH_VARIABLE(bit_byte,byte,7);
     203                            BIT_HASH_VARIABLE(bit_byte,bit,0);
    235204                        #endif
    236205
     
    240209                        #undef BIT_HASH_VARIABLE
    241210
    242 //                      #ifdef SINGLE_STRATEGY
    243 
    244 //                      if(bitblock::any(groups.follows_0)) {
    245 //                          do_block<GIDS, id_hash_table<0, id, bit, ALLOCATOR> >
    246 //                                              (blk_offset,
    247 //                                               hash_table_0,
    248 //                                               starts, &groups.follows_0,
    249 //                                               buffer,
    250 //                                               (uint8_t *)h0, (uint8_t *)h1, BLOCK_SIZE/8,
    251 //                                               gids, this->gid_factory, this->gid_data);
    252 //                      }
    253 
    254 //                      #else
    255211        }
    256212
     
    316272            logbase2_hash_table<16, logbase2, bit, ALLOCATOR> hash_table_16;
    317273            id_hash_table<0, id, bit, ALLOCATOR> hash_table_0;
    318         #elif SINGLE_STRATEGY
    319             single_hash_table<0, single, bit, ALLOCATOR> hash_table_0;
     274        #elif BIT_BYTE_STRATEGY
     275            bit_byte_hash_table<7, bit_byte, byte, ALLOCATOR> hash_table_7;
     276            bit_byte_hash_table<0, bit_byte, bit, ALLOCATOR> hash_table_0;
    320277        #else
    321             #error "Length group strategy not specified. #define {ID_STRATEGY,DIV2_STRATEGY,LOGBASE2_STRATEGY,DIV2_LOGBASE2_STRATEGY,SINGLE}."
     278            #error "Length group strategy not specified. #define {ID_STRATEGY,DIV2_STRATEGY,LOGBASE2_STRATEGY,DIV2_LOGBASE2_STRATEGY,BIT_BYTE}."
    322279        #endif
    323280
     
    361318                                        h1_base -= (h_block_size * blk_count);
    362319                        }
    363 
    364                         assert (spos >= 0);
    365320
    366321                        gid = h_table.lookup_or_insert(buffer_base, spos, lgth, h0_base, h1_base, gid_factory, gid_data); // WARNING: spos must be >= 0
  • trunk/symbol_table/symbol_table.pro

    r2103 r2106  
    6060    single_group_strms .py \
    6161    single_group_strms .py \
    62     single_group_strms.py
     62    bit_byte_group_strms.py \
     63    Compiler/workspace/bitutil.py \
     64    single_logbase2.py
    6365HEADERS += marker_strms_template.hpp \
    6466    marker_strms.hpp \
     
    9193    lib/bitblock128.hpp \
    9294    lib/bitblock.hpp \
    93     src/types.hpp \
    9495    lib/debug.hpp \
    95     lib/bitblock_align.hpp
     96    lib/bitblock_align.hpp \
     97    src/strategy_types.hpp
  • trunk/symbol_table/test/gen_test_file.py

    r2105 r2106  
    6767            sys.exit()
    6868
    69         min_lgth = 4
    70         max_lgth = 4
    71         occurences = 100
     69        min_lgth = 1
     70        max_lgth = 20
     71        occurences = 100       
    7272        unique = 10
    7373
Note: See TracChangeset for help on using the changeset viewer.