Changeset 2101 for trunk


Ignore:
Timestamp:
May 15, 2012, 2:48:51 PM (7 years ago)
Author:
ksherdy
Message:

Added mixed div2, log2 support. Updated div2 hash strategy to test prepended delimeter.

Location:
trunk/symbol_table
Files:
45 added
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/div2_group_strms.py

    r2097 r2101  
    1717def Gen_lgth_groups(groups):
    1818
    19         shift_or_follows = (groups.follows | pablo.Advance(groups.follows))
    20         follows = groups.follows
     19        ### Div2 Strategy ###
     20        follows_gt_0 = groups.follows
     21        follows_mask_1_2 = pablo.Advance(groups.starts) | pablo.Advance(pablo.Advance(groups.starts))
    2122
    22         # Group symbols of length 1 and length 2
    23         cursor = pablo.Advance(pablo.Advance(groups.starts))
    24         groups.follows_2 = cursor & (shift_or_follows)
     23        # Groups symbols of length 1 and length 2
     24        groups.follows_2 = follows_gt_0 & follows_mask_1_2
     25        follows_gt_2 = follows_gt_0 &~ follows_mask_1_2
     26        follows_mask_3_4 = pablo.Advance(pablo.Advance(follows_mask_1_2))
    2527
    26         mask = pablo.Advance(groups.starts) | pablo.Advance(pablo.Advance(groups.starts))
    27         follows = follows &~ mask
     28        # Groups symbols of length 3 and length 4
     29        groups.follows_4 = follows_gt_2 & follows_mask_3_4
     30        follows_gt_4 = follows_gt_2 &~ follows_mask_3_4
     31        follows_mask_5_6 = pablo.Advance(pablo.Advance(follows_mask_3_4))
    2832
    29         # Group symbols of length 3 and length 4
    30         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    31         groups.follows_4 = cursor & (shift_or_follows)
     33        # Groups symbols of length 5 and length 6
     34        groups.follows_6 = follows_gt_4 & follows_mask_5_6
     35        follows_gt_6 = follows_gt_4 &~ follows_mask_5_6
     36        follows_mask_7_8 = pablo.Advance(pablo.Advance(follows_mask_5_6))
    3237
    33         mask = pablo.Advance(pablo.Advance(mask))
    34         follows = follows &~ mask
     38        # Rinse and repeat
     39        groups.follows_8 = follows_gt_6 & follows_mask_7_8
     40        follows_gt_8 = follows_gt_6 &~ follows_mask_7_8
     41        follows_mask_9_10 = pablo.Advance(pablo.Advance(follows_mask_7_8))
    3542
    36         ## Group symbols of length 5 and length 6
    37         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    38         groups.follows_6 = cursor & (shift_or_follows)
     43        groups.follows_10 = follows_gt_8 & follows_mask_9_10
     44        follows_gt_10 = follows_gt_8 &~ follows_mask_9_10
     45        follows_mask_11_12 = pablo.Advance(pablo.Advance(follows_mask_9_10))
    3946
    40         mask = pablo.Advance(pablo.Advance(mask))
    41         follows = follows &~ mask
     47        groups.follows_12 = follows_gt_10 & follows_mask_11_12
     48        follows_gt_12 = follows_gt_10 &~ follows_mask_11_12
     49        follows_mask_13_14 = pablo.Advance(pablo.Advance(follows_mask_11_12))
    4250
    43         ## Group symbols of length 7 and length 8
    44         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    45         groups.follows_8 = cursor & (shift_or_follows)
     51        groups.follows_14 = follows_gt_12 & follows_mask_13_14
     52        follows_gt_14 = follows_gt_12 &~ follows_mask_13_14
     53        follows_mask_15_16 = pablo.Advance(pablo.Advance(follows_mask_13_14))
    4654
    47         mask = pablo.Advance(pablo.Advance(mask))
    48         follows = follows &~ mask
     55        groups.follows_16 = follows_gt_14 & follows_mask_15_16
     56        follows_gt_16 = follows_gt_14 &~ follows_mask_15_16
    4957
    50         ## Group symbols of length 9 and length 10
    51         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    52         groups.follows_10 = cursor & (shift_or_follows)
    53 
    54         mask = pablo.Advance(pablo.Advance(mask))
    55         follows = follows &~ mask
    56 
    57         ## Group symbols of length 11 and length 12
    58         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    59         groups.follows_12 = cursor & (shift_or_follows)
    60 
    61         mask = pablo.Advance(pablo.Advance(mask))
    62         follows = follows &~ mask
    63 
    64         ## Group symbols of length 13 and length 14
    65         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    66         groups.follows_14 = cursor & (shift_or_follows)
    67 
    68         mask = pablo.Advance(pablo.Advance(mask))
    69         follows = follows &~ mask
    70 
    71         ## Group symbols of length 15 and length 16
    72         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    73         groups.follows_16 = cursor & (shift_or_follows)
    74 
    75         mask = pablo.Advance(pablo.Advance(mask))
    76         follows = follows &~ mask
    77 
    78         ## Group symbols of length greater than equal to 17
    79         groups.follows_gte_17 = follows
     58        groups.follows_gte_17 = follows_gt_16
    8059
    8160def Main(groups):
  • trunk/symbol_table/div2_logbase2_group_strms.py

    r2098 r2101  
    44        starts = 0
    55        follows = 0
    6         follows_1 = 0
    76        follows_2 = 0
    8         follows_3 = 0
    97        follows_4 = 0
    10         follows_5 = 0
    118        follows_6 = 0
    12         follows_7 = 0
    139        follows_8 = 0
    1410        follows_16 = 0
     
    1713def Gen_lgth_groups(groups):
    1814
    19         shift_or_follows = (groups.follows | pablo.Advance(groups.follows))
    20         follows = groups.follows
     15        ### Div2 Strategy ###
     16        follows_gt_0 = groups.follows
     17        follows_mask_1_2 = pablo.Advance(groups.starts) | pablo.Advance(pablo.Advance(groups.starts))
    2118
    22         # Group symbols of length 1 and length 2
    23         cursor = pablo.Advance(pablo.Advance(groups.starts))
    24         groups.follows_2 = cursor & (shift_or_follows)
     19        # Groups symbols of length 1 and length 2
     20        groups.follows_2 = follows_gt_0 & follows_mask_1_2
     21        follows_gt_2 = follows_gt_0 &~ follows_mask_1_2
     22        follows_mask_3_4 = pablo.Advance(pablo.Advance(follows_mask_1_2))
    2523
    26         mask = pablo.Advance(groups.starts) | pablo.Advance(pablo.Advance(groups.starts))
    27         follows = follows &~ mask
     24        # Groups symbols of length 3 and length 4
     25        groups.follows_4 = follows_gt_2 & follows_mask_3_4
     26        follows_gt_4 = follows_gt_2 &~ follows_mask_3_4
     27        follows_mask_5_6 = pablo.Advance(pablo.Advance(follows_mask_3_4))
    2828
    29         # Group symbols of length 3 and length 4
    30         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    31         groups.follows_4 = cursor & (shift_or_follows)
     29        # Groups symbols of length 5 and length 6
     30        groups.follows_6 = follows_gt_4 & follows_mask_5_6
     31        follows_gt_6 = follows_gt_4 &~ follows_mask_5_6
     32        follows_mask_7_8 = pablo.Advance(pablo.Advance(follows_mask_5_6))
    3233
    33         mask = pablo.Advance(pablo.Advance(mask))
    34         follows = follows &~ mask
     34        # Groups symbols of length 7 and length 8
     35        groups.follows_8 = follows_gt_6 & follows_mask_7_8
     36        follows_gt_8 = follows_gt_6 &~ follows_mask_7_8
    3537
    36         ## Group symbols of length 5 and length 6
    37         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    38         groups.follows_6 = cursor & (shift_or_follows)
     38        ### Log2 Strategy ###
     39        follows_mask_1_8 = follows_mask_1_2 | follows_mask_3_4 | follows_mask_5_6 | follows_mask_7_8
     40        # Naive Advance
     41        follows_mask_9_16 = pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(follows_mask_1_8))))))))
    3942
    40         mask = pablo.Advance(pablo.Advance(mask))
    41         follows = follows &~ mask
     43        # Groups symbols of length 9 to length 16
     44        groups.follows_16 = follows_gt_8 & follows_mask_9_16
    4245
    43         ## Group symbols of length 7 and length 8
    44         cursor = pablo.Advance(pablo.Advance(cursor &~ shift_or_follows))
    45         groups.follows_8 = cursor & (shift_or_follows)
     46        # Advance 32 and Interpose
     47        # temp32 = pablo.Advance32 (follows_mask_1_8)
     48        # groups.follows_16 = interpose32 (temp, temp32, 8)
    4649
    47         mask = pablo.Advance(pablo.Advance(mask))
    48         follows = follows &~ mask
    49         cursor = cursor &~ shift_or_follows
     50        follows_gt_16 = follows_gt_8 &~ follows_mask_9_16
    5051
    51         ## Group symbols of length 9 to legth 16
    52         cursor9 = pablo.Advance(cursor)
    53         cursor10 = pablo.Advance(cursor9)
    54         cursor11_12 = pablo.Advance(pablo.Advance(cursor9|cursor10))
    55         cursor13_16 = pablo.Advance(pablo.Advance(pablo.Advance(pablo.Advance(cursor9|cursor10|cursor11_12))))
    56         cursor9_16 = cursor9 | cursor10 | cursor11_12 | cursor13_16
    57         groups.follows_16 = cursor9_16 & groups.follows
    58 
    59         # Group symbols of length 17 and longer
    60         follows = follows &~ groups.follows_16
    61         groups.follows_gte_17 = follows
     52        # Groups symbols of length greater than 16
     53        groups.follows_gte_17 = follows_gt_16
    6254
    6355def Main(groups):
  • trunk/symbol_table/main_template.cpp

    r2098 r2101  
    132132    uint32_t chars_avail = is.gcount();
    133133
     134    #if IDENTITY_TEST
     135        cout << ","; // prepend delimeter
     136    #endif
     137
    134138    ///////////////////////////////////////////////////////////////////////////
    135139    // Full Segments
     
    171175    // test
    172176    #ifdef IDENTITY_TEST
     177
     178    cout << "," << endl; // prepend delimeter
     179
    173180    uint32_t blk_offset;
    174181    for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
  • trunk/symbol_table/src/Makefile

    r2098 r2101  
    1414
    1515TEST_SCRIPT = ../test/run_tests.py
    16 TEST_SRC_DIR = ../test/in
    17 TEST_DST_DIR = ../test/out
     16TEST_SRC_DIR = ../test/pin
     17TEST_DST_DIR = ../test/pout
    1818
    1919# Perf builds
  • trunk/symbol_table/src/hash_table.hpp

    r2098 r2101  
    416416        } else {
    417417
    418             if (!div2_hash_table::is_delimeter((char) *(raw_bytes + (idx + lgth-1)))) { // TODO - update on pablo to check at pos == idx
    419 
     418            if (!div2_hash_table::is_delimeter((char) *(raw_bytes + (idx)))) {
    420419                this->pool_and_insert(bucket, raw_bytes, idx, lgth, h0, h1, gid_factory, gid_data, gid);
    421420                return gid;
     
    424423                // Odd
    425424                ///////////////////////////////////////////////////////////////////////////
    426                 bucket = this->hash_table_odd.get_bucket(h0,h1,idx);
    427                 lgth = lgth - 1;
    428 
    429                 if(this->hash_table_odd.lookup(bucket, raw_bytes, idx, lgth, h0, h1, gid)) {
     425                bucket = this->hash_table_odd.get_bucket(h0,h1,idx+1);
     426
     427                if(this->hash_table_odd.lookup(bucket, raw_bytes, idx+1, lgth-1, h0, h1, gid)) {
    430428                    return gid;
    431429                }
    432430
    433                 hash_table_odd.pool_and_insert(bucket, raw_bytes, idx, lgth, h0, h1, gid_factory, gid_data, gid);
     431                hash_table_odd.pool_and_insert(bucket, raw_bytes, idx+1, lgth-1, h0, h1, gid_factory, gid_data, gid);
    434432                return gid;
    435433            }
  • trunk/symbol_table/src/symbol_table.hpp

    r2098 r2101  
    2929static void print_symbol_debug(gid_type gid, const uint8_t buffer [], const int32_t spos, const uint32_t epos, const uint32_t lgth) {
    3030        cout << "{Symbol:{";
    31         cout << "GID:'" << gid;
    32         cout << "',Length:" << lgth;
    33         cout << "',Value:'" << string((char *)&(buffer[spos]), lgth) << "'";
    34         cout << "',Start':" << spos;
    35         cout << "',End:" << epos;
     31        cout << "GID:" << gid;
     32        cout << ",Length:" << lgth;
     33        cout << ",Value:'" << string((char *)&(buffer[spos]), lgth) << "'";
     34        cout << ",Start:" << spos;
     35        cout << ",Follows:" << epos;
    3636        cout << "}}" << endl;
    3737}
     
    333333
    334334                gid_type gid;
    335                 int32_t epos;
     335                int32_t fpos;
    336336                int32_t spos;
    337337                uint32_t blk_count;
     
    340340
    341341        rscanner.scan_to_next();
    342         epos = rscanner.get_pos();
     342        fpos = rscanner.get_pos();
    343343
    344344                while(!rscanner.is_done()) {
    345345
    346                 spos = epos - lgth;
     346                spos = fpos - lgth;
    347347
    348348                        if(spos < 0) { // boundary case
     
    358358                        gid = h_table.lookup_or_insert(buffer_base, spos, lgth, h0_base, h1_base, gid_factory, gid_data); // WARNING: spos must be >= 0
    359359
    360                         #ifdef ID_SYMBOL_STORE_SYMBOL_GIDS_AT_END_POSITION
     360                        #ifdef ID_SYMBOL_STORE_SYMBOL_GIDS_AT_FOLLOW_POSITION
    361361                        gids.at[blk_offset + epos] = gid;
    362362                        #else
    363                         gids.at[blk_offset + epos - lgth] = gid;
     363                        gids.at[blk_offset + fpos - gid_data.get_bytes_lgth(gid)] = gid;
    364364                        #endif
    365365
    366366                        #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    367                                 print_symbol_debug(gid, buffer_base, spos, epos, gid_data.get_bytes_lgth(gid));
     367                                print_symbol_debug(gid, buffer_base, spos, fpos, gid_data.get_bytes_lgth(gid));
    368368                        #endif
    369369
    370370                        rscanner.scan_to_next();
    371                         epos = rscanner.get_pos();
     371                        fpos = rscanner.get_pos();
    372372                }
    373373        }
     
    390390
    391391        gid_type gid;
    392         int32_t epos;
     392        int32_t fpos;
    393393        int32_t spos;
    394394        uint32_t lgth;
     
    399399
    400400        follows_rscanner.scan_to_next();
    401         epos = follows_rscanner.get_pos();
     401        fpos = follows_rscanner.get_pos();
    402402
    403403        while(!follows_rscanner.is_done()) {
    404404
    405                 starts_rscanner.move_to(epos);
     405                starts_rscanner.move_to(fpos);
    406406                starts_rscanner.scan_to_next();
    407407                spos = starts_rscanner.get_pos();
    408                 lgth = epos - spos;
     408                lgth = fpos - spos;
    409409
    410410                while(starts_rscanner.is_done()) { // boundary case
     
    417417
    418418                        if(!starts_rscanner.is_done()) { // found start
    419                                         lgth = epos + (BLOCK_SIZE - starts_rscanner.get_pos()) + (BLOCK_SIZE * (blk_count-1));
     419                                        lgth = fpos + (BLOCK_SIZE - starts_rscanner.get_pos()) + (BLOCK_SIZE * (blk_count-1));
    420420                                        spos = starts_rscanner.get_pos();
    421421                                        buffer_base -= (BLOCK_SIZE * blk_count);
     
    432432                gids.at[blk_offset + epos] = gid;
    433433                #else
    434                 gids.at[blk_offset + epos - lgth] = gid;
     434                gids.at[blk_offset + fpos - lgth] = gid;
    435435                #endif
    436436
    437437                #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    438                         //print_symbol_debug(gid, buffer, spos, epos, lgth);
    439                         print_symbol_debug(gid, buffer_base, spos, epos, lgth);
     438                        //print_symbol_debug(gid, buffer, spos, fpos, lgth);
     439                        print_symbol_debug(gid, buffer_base, spos, fpos, gid_data.get_bytes_lgth(gid));
    440440                #endif
    441441
    442442                follows_rscanner.scan_to_next();
    443                 epos = follows_rscanner.get_pos();
     443                fpos = follows_rscanner.get_pos();
    444444        }
    445445}
  • trunk/symbol_table/symbol_table.pro

    r2098 r2101  
    1414    libtest/byte_compare_template.cpp \
    1515    libtest/byte_compare_generator.cpp \
    16     libtest/bitscantest.cpp
     16    libtest/bitscantest.cpp \
     17    src/main.cpp
    1718OTHER_FILES += marker_strms.py \
    1819    Makefile \
  • trunk/symbol_table/test/run_tests.py

    r2094 r2101  
    1111# Edit:
    1212program_dir_path='../src'
    13 test_dir_path='../test/in'
    14 out_dir_path='../test/out'
     13test_dir_path='../test/pin'
     14out_dir_path='../test/pout'
    1515#----------------------------------------------------------------------------
    1616import sys
Note: See TracChangeset for help on using the changeset viewer.