Changeset 1387


Ignore:
Timestamp:
Aug 26, 2011, 11:42:51 AM (8 years ago)
Author:
vla24
Message:

Symbol Table: Implemented division by 2 grouping strategy

Files:
3 added
7 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/parabix2_symtab_pbs_div.py

    r1232 r1387  
    102102        HexRef_ends = 0
    103103
     104class Hash_data():
     105        Hash_value = 0
     106
    104107class Tag_Callouts():
    105108        ElemName_starts = 0
    106109        ElemName_ends = 0
    107         ElemName_ends_1 = 0
    108         ElemName_ends_2 = 0
    109         ElemName_ends_3 = 0
    110         ElemName_ends_4 = 0
    111         ElemName_ends_5 = 0
    112         ElemName_ends_6 = 0
    113         ElemName_ends_7 = 0
    114         ElemName_ends_8 = 0
    115         ElemName_ends_9 = 0
    116         ElemName_ends_10 = 0
    117         ElemName_ends_11 = 0
    118         ElemName_ends_12 = 0
    119         ElemName_ends_13 = 0
    120         ElemName_ends_14 = 0
    121         ElemName_ends_15 = 0
    122         ElemName_ends_16 = 0
    123         ElemName_ends_17_and_longer = 0
     110        ElemName_ends_1_to_2 = 0
     111        ElemName_ends_3_to_4 = 0
     112        ElemName_ends_5_to_6 = 0
     113        ElemName_ends_7_to_8 = 0
     114        ElemName_ends_9_to_10 = 0
     115        ElemName_ends_11_to_12 = 0
     116        ElemName_ends_13_to_14 = 0
     117        ElemName_ends_15_to_16 = 0
     118        ElemName_remaining_ends = 0
    124119        AttName_starts = 0
    125120        AttName_ends = 0
     
    568563def Form_Length_Group_Bitstreams(tag_Callouts):
    569564
    570     remaining_starts = tag_Callouts.ElemName_starts
    571     remaining_ends = tag_Callouts.ElemName_ends
    572     temp = tag_Callouts.ElemName_starts
    573     temp32 = bitutil.Advance32(temp)
    574 
    575     # Group symbols of length 1
    576     tag_Callouts.ElemName_ends_1 = interpose32(temp, temp32, 1) & remaining_ends
    577     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_1
    578 
    579     # Group symbols of length 2
    580     tag_Callouts.ElemName_ends_2 = interpose32(temp, temp32, 2) & remaining_ends
    581     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_2
    582 
    583     # Group symbols of length 3
    584     tag_Callouts.ElemName_ends_3 = interpose32(temp, temp32, 3) & remaining_ends
    585     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_3
    586 
    587     # Group symbols of length 4
    588     tag_Callouts.ElemName_ends_4 = interpose32(temp, temp32, 4) & remaining_ends
    589     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_4
    590 
    591     # Group symbols of length 5
    592     tag_Callouts.ElemName_ends_5 = interpose32(temp, temp32, 5) & remaining_ends
    593     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_5
    594 
    595     # Group symbols of length 6
    596     tag_Callouts.ElemName_ends_6 = interpose32(temp, temp32, 6) & remaining_ends
    597     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_6
    598 
    599     # Group symbols of length 7
    600     tag_Callouts.ElemName_ends_7 = interpose32(temp, temp32, 7) & remaining_ends
    601     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_7
    602 
    603     # Group symbols of length 8
    604     tag_Callouts.ElemName_ends_8 = interpose32(temp, temp32, 8) & remaining_ends
    605     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_8
    606 
    607     # Group symbols of length 9
    608     tag_Callouts.ElemName_ends_9 = interpose32(temp, temp32, 9) & remaining_ends
    609     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_9
    610 
    611     # Group symbols of length 10
    612     tag_Callouts.ElemName_ends_10 = interpose32(temp, temp32, 10) & remaining_ends
    613     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_10
    614 
    615     # Group symbols of length 11
    616     tag_Callouts.ElemName_ends_11 = interpose32(temp, temp32, 11) & remaining_ends
    617     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_11
    618 
    619     # Group symbols of length 12
    620     tag_Callouts.ElemName_ends_12 = interpose32(temp, temp32, 12) & remaining_ends
    621     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_12
    622 
    623     # Group symbols of length 13
    624     tag_Callouts.ElemName_ends_13 = interpose32(temp, temp32, 13) & remaining_ends
    625     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_13
    626 
    627     # Group symbols of length 14
    628     tag_Callouts.ElemName_ends_14 = interpose32(temp, temp32, 14) & remaining_ends
    629     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_14
    630 
    631     # Group symbols of length 15
    632     temp15 = interpose32(temp, temp32, 15)
    633     tag_Callouts.ElemName_ends_15 = temp15 & remaining_ends
    634     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_15
    635 
    636     # Group symbols of length 16
    637     temp = bitutil.Advance(temp15)
    638     tag_Callouts.ElemName_ends_16 = temp & remaining_ends
    639     remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_16
     565    starts = tag_Callouts.ElemName_starts
     566    ends = tag_Callouts.ElemName_ends
     567
     568    temp = ends | bitutil.Advance(ends)
     569
     570    # Group symbols of length 1 and 2
     571    start_2 = bitutil.Advance(bitutil.Advance(starts))
     572    tag_Callouts.ElemName_ends_1_to_2 =  start_2 & temp
     573
     574    # Group symbols of length 3 and 4
     575    start_4 = bitutil.Advance(bitutil.Advance(start_2 & ~temp))
     576    tag_Callouts.ElemName_ends_3_to_4 =  start_4 & temp
     577
     578    # Group symbols of length 5 and 6
     579    start_6 = bitutil.Advance(bitutil.Advance(start_4 & ~temp))
     580    tag_Callouts.ElemName_ends_5_to_6 =  start_6 & temp
     581
     582    # Group symbols of length 7 and 8
     583    start_8 = bitutil.Advance(bitutil.Advance(start_6 & ~temp))
     584    tag_Callouts.ElemName_ends_7_to_8 =  start_8 & temp
     585
     586    # Group symbols of length 9 and 10
     587    start_10 = bitutil.Advance(bitutil.Advance(start_8 & ~temp))
     588    tag_Callouts.ElemName_ends_9_to_10 =  start_10 & temp
     589
     590    # Group symbols of length 11 and 12
     591    start_12 = bitutil.Advance(bitutil.Advance(start_10 & ~temp))
     592    tag_Callouts.ElemName_ends_11_to_12 =  start_12 & temp
     593
     594    # Group symbols of length 13 and 14
     595    start_14 = bitutil.Advance(bitutil.Advance(start_12 & ~temp))
     596    tag_Callouts.ElemName_ends_13_to_14 =  start_14 & temp
     597
     598    # Group symbols of length 15 and 16
     599    start_16 = bitutil.Advance(bitutil.Advance(start_14 & ~temp))
     600    tag_Callouts.ElemName_ends_15_to_16 =  start_16 & temp
    640601
    641602    # Group symbols of length 17 and longer
    642     tag_Callouts.ElemName_ends_17_and_longer = remaining_ends
     603    tag_Callouts.ElemName_remaining_ends = start_16 & ~tag_Callouts.ElemName_ends_15_to_16
    643604
    644605
  • proto/SymbolTable/symtab_pbgs_div_template.cpp

    r1231 r1387  
    5555char * source;
    5656LineColTracker tracker;
     57
     58static inline void ReportError(const char * error_msg, int error_pos_in_block) {
     59  int error_line, error_column;
     60  tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     61  fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
     62}
     63
     64class ErrorTracker {
     65public:
     66    ErrorTracker() { noted_pos_in_block = -1;}
     67
     68    inline void NoteError(const char * error_msg, BitBlock err_strm) {
     69      int pos_in_block = count_forward_zeroes(err_strm);
     70      if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
     71        noted_pos_in_block = pos_in_block;
     72        noted_error = error_msg;
     73      }
     74    }
     75
     76    inline void If_Error_Report_First() {
     77      if (noted_pos_in_block > -1) {
     78              int error_line, error_column;
     79              ReportError(noted_error, noted_pos_in_block);
     80              exit(-1);
     81      }
     82    }
     83
     84private:
     85  const char * noted_error;
     86  int noted_pos_in_block;
     87};
     88
     89
    5790TagMatcher matcher;
    5891BitBlock EOF_mask = simd_const_1(1);
    59 BitBlock elem_starts;
    60 int previous_block_last_elem_start;
     92
     93ErrorTracker error_tracker;
     94BitBlock elem_ends;
     95int last_elem_start;
     96bool block_boundary_case = false;
    6197BytePack hashvalues[2];
    6298
     
    65101
    66102
    67 static inline int ScanBackwardPos(BitBlock * block, int pos);
     103static inline int ScanForwardPos(BitBlock * block, int pos);
    68104static inline int compute_hash_value (int lgth, int start);
     105static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
    69106template <int L> static inline int ElemEnd_grouping(int pos, int length);
    70107template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    71108
    72 static inline int compute_hash_value (int lgth, int start)
    73 {
    74     unsigned int offset_bit = start + 128;
    75     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    76     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
    77 }
    78 
    79 static inline int ScanBackwardPos(BitBlock * block, int pos)
     109
     110static inline int ScanForwardPos(BitBlock * block, int pos)
    80111{
    81112    BitBlock s = block[0];
    82     BitBlock temp = simd_and( s, simd_not(simd<128>::sll(simd<1>::constant<1>(), sisd_from_int(pos))) );
     113    BitBlock temp = simd_and(s, simd<128>::sll(simd<1>::constant<1>(), sisd_from_int(pos)));
     114
    83115    if (bitblock_has_bit(temp))
    84116    {
    85         // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
    86         return sizeof(BitBlock)*8 - count_backward_zeroes (temp) - 1;
     117#if DEBUG
     118        printf ("There is a 1 bit in the block\n");
     119#endif
     120        return count_forward_zeroes (temp);
    87121    }
    88122    else
    89123    {
     124#if DEBUG
     125        printf ("There is no more 1 bit in the block. pos: %i\n", pos);
     126#endif
    90127        //handle boundary case
     128        block_boundary_case = true;
     129        last_elem_start = pos - BLOCK_SIZE;
    91130#if DEBUG
    92         printf ("%s | block boundary case, return %i\n", __FUNCTION__, previous_block_last_elem_start - 1);
    93 #endif
    94         return previous_block_last_elem_start - 1;
     131        printf ("last_elem_start: %i\n", last_elem_start);
     132#endif
     133        return 0;
    95134    }
    96135}
     
    111150}
    112151
    113 
    114 template <int L> static inline int ElemEnd_grouping(int pos, int length) {
    115     return 0;
    116 }
    117 
    118 // length in [1,4]
    119 template <>
    120 inline int ElemEnd_grouping<4>(int pos, int L) {
    121     int start = pos + block_base;
    122     int hashvalue = compute_hash_value(L, pos);
    123     int gid = 0;
    124     switch (L)
    125     {
    126     case 1:
    127         gid = pbgs_symbol_table.Lookup_or_Insert_Name<1>(source + start, hashvalue);
    128         break;
    129     case 2:
    130         gid = pbgs_symbol_table.Lookup_or_Insert_Name<2>(source + start, hashvalue);
    131         break;
    132     case 3:
    133         gid = pbgs_symbol_table.Lookup_or_Insert_Name<3>(source + start, hashvalue);
    134         break;
    135     case 4:
    136         gid = pbgs_symbol_table.Lookup_or_Insert_Name<4>(source + start, hashvalue);
    137         break;
    138     }
    139 
     152static inline int compute_hash_value (int lgth, int start)
     153{
     154    unsigned int offset_bit = start + 128;
     155    uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
     156    return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
     157}
     158
     159// length in [1,16]
     160template <int L>
     161static inline int ElemEnd_grouping(int end_pos) {
     162    int end = block_base + end_pos;
     163    int start = end - L;
     164    int hashvalue = compute_hash_value(L, start - block_base);
     165    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    140166    gids.push_back(gid);
    141167#if DEBUG
    142     int end = start + L;
    143168    char* symbol = new char[L+1];
    144169    strncpy ( symbol, source + start, L );
    145170    symbol[L] ='\0';
    146     printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
     171    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, L, gid, hashvalue, symbol );
    147172    delete symbol; symbol = 0;
    148173#endif
     
    150175}
    151176
    152 // length in [5,8]
    153 template <>
    154 inline int ElemEnd_grouping<8>(int pos, int L) {
    155     int start = pos + block_base;
    156     int hashvalue = compute_hash_value(L, pos);
    157     int gid = 0;
    158     switch (L)
    159     {
    160     case 5:
    161         gid = pbgs_symbol_table.Lookup_or_Insert_Name<5>(source + start, hashvalue);
    162         break;
    163     case 6:
    164         gid = pbgs_symbol_table.Lookup_or_Insert_Name<6>(source + start, hashvalue);
    165         break;
    166     case 7:
    167         gid = pbgs_symbol_table.Lookup_or_Insert_Name<7>(source + start, hashvalue);
    168         break;
    169     case 8:
    170         gid = pbgs_symbol_table.Lookup_or_Insert_Name<8>(source + start, hashvalue);
    171         break;
    172     }
    173 
    174     gids.push_back(gid);
    175 #if DEBUG
    176     int end = start + L;
    177     char* symbol = new char[L+1];
    178     strncpy ( symbol, source + start, L );
    179     symbol[L] ='\0';
    180     printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
    181     delete symbol; symbol = 0;
    182 #endif
    183     return 0;
    184 }
    185 
    186 // length in [9,12]
    187 template <>
    188 inline int ElemEnd_grouping<12>(int pos, int L) {
    189     int start = pos + block_base;
    190     int hashvalue = compute_hash_value(L, pos);
    191     int gid = 0;
    192     switch (L)
    193     {
    194     case 9:
    195         gid = pbgs_symbol_table.Lookup_or_Insert_Name<9>(source + start, hashvalue);
    196         break;
    197     case 10:
    198         gid = pbgs_symbol_table.Lookup_or_Insert_Name<10>(source + start, hashvalue);
    199         break;
    200     case 11:
    201         gid = pbgs_symbol_table.Lookup_or_Insert_Name<11>(source + start, hashvalue);
    202         break;
    203     case 12:
    204         gid = pbgs_symbol_table.Lookup_or_Insert_Name<12>(source + start, hashvalue);
    205         break;
    206     }
    207 
    208     gids.push_back(gid);
    209 #if DEBUG
    210     int end = start + L;
    211     char* symbol = new char[L+1];
    212     strncpy ( symbol, source + start, L );
    213     symbol[L] ='\0';
    214     printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
    215     delete symbol; symbol = 0;
    216 #endif
    217     return 0;
    218 }
    219 
    220 // length in [13,16]
    221 template <>
    222 inline int ElemEnd_grouping<16>(int pos, int L) {
    223     int start = pos + block_base;
    224     int hashvalue = compute_hash_value(L, pos);
    225     int gid = 0;
    226     switch (L)
    227     {
    228     case 13:
    229         gid = pbgs_symbol_table.Lookup_or_Insert_Name<13>(source + start, hashvalue);
    230         break;
    231     case 14:
    232         gid = pbgs_symbol_table.Lookup_or_Insert_Name<14>(source + start, hashvalue);
    233         break;
    234     case 15:
    235         gid = pbgs_symbol_table.Lookup_or_Insert_Name<15>(source + start, hashvalue);
    236         break;
    237     case 16:
    238         gid = pbgs_symbol_table.Lookup_or_Insert_Name<16>(source + start, hashvalue);
    239         break;
    240     }
    241 
    242     gids.push_back(gid);
    243 #if DEBUG
    244     int end = start + L;
    245     char* symbol = new char[L+1];
    246     strncpy ( symbol, source + start, L );
    247     symbol[L] ='\0';
    248     printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
    249     delete symbol; symbol = 0;
    250 #endif
    251     return 0;
    252 }
    253 
    254177// length > 16
    255 template<>
    256 inline int ElemEnd_grouping<17>(int pos) {
    257     int end = block_base + pos;
    258     int start = ScanBackwardPos (&elem_starts, pos) + block_base;
    259     int lgth = end - start;
     178static inline int ElemStart_grouping(int start_pos, int lgth) {
     179    int start = start_pos + block_base;
    260180    int hashvalue = compute_hash_value(lgth, start - block_base);
    261     int gid = 0;
    262 
    263     switch (lgth)
    264     {
    265     case 17:
    266         gid = pbgs_symbol_table.Lookup_or_Insert_Name<17>(source + start, hashvalue);
    267         break;
    268     case 18:
    269         gid = pbgs_symbol_table.Lookup_or_Insert_Name<18>(source + start, hashvalue);
    270         break;
    271     case 19:
    272         gid = pbgs_symbol_table.Lookup_or_Insert_Name<19>(source + start, hashvalue);
    273         break;
    274     case 20:
    275         gid = pbgs_symbol_table.Lookup_or_Insert_Name<20>(source + start, hashvalue);
    276         break;
    277     case 21:
    278         gid = pbgs_symbol_table.Lookup_or_Insert_Name<21>(source + start, hashvalue);
    279         break;
    280     case 22:
    281         gid = pbgs_symbol_table.Lookup_or_Insert_Name<22>(source + start, hashvalue);
    282         break;
    283     case 23:
    284         gid = pbgs_symbol_table.Lookup_or_Insert_Name<23>(source + start, hashvalue);
    285         break;
    286     case 24:
    287         gid = pbgs_symbol_table.Lookup_or_Insert_Name<24>(source + start, hashvalue);
    288         break;
    289     case 25:
    290         gid = pbgs_symbol_table.Lookup_or_Insert_Name<25>(source + start, hashvalue);
    291         break;
    292     case 26:
    293         gid = pbgs_symbol_table.Lookup_or_Insert_Name<26>(source + start, hashvalue);
    294         break;
    295     case 27:
    296         gid = pbgs_symbol_table.Lookup_or_Insert_Name<27>(source + start, hashvalue);
    297         break;
    298     case 28:
    299         gid = pbgs_symbol_table.Lookup_or_Insert_Name<28>(source + start, hashvalue);
    300         break;
    301     case 29:
    302         gid = pbgs_symbol_table.Lookup_or_Insert_Name<29>(source + start, hashvalue);
    303         break;
    304     case 30:
    305         gid = pbgs_symbol_table.Lookup_or_Insert_Name<30>(source + start, hashvalue);
    306         break;
    307     case 31:
    308         gid = pbgs_symbol_table.Lookup_or_Insert_Name<31>(source + start, hashvalue);
    309         break;
    310     case 32:
    311         gid = pbgs_symbol_table.Lookup_or_Insert_Name<32>(source + start, hashvalue);
    312         break;
    313     default:
    314         gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    315         break;
    316     }
     181    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    317182    gids.push_back(gid);
    318183#if DEBUG
     
    320185    strncpy ( symbol, source + start, lgth );
    321186    symbol[lgth] ='\0';
    322     printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, end, end - block_base, lgth, hashvalue, gid, symbol);
     187    printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid, symbol);
    323188#endif
    324189    return 0;
    325190}
    326191
    327 // L = 4, pass in bitstream for symbols length [1,4]
    328 // L = 8, pass in bitstream for symbols length [5,8]
    329 // L = 12, pass in bitstream for symbols length [9,12]
    330 // L = 16, pass in bitstream for symbols length [13,16]
     192// L = 2, pass in bitstream for symbols length [1,2]
     193// L = 4, pass in bitstream for symbols length [3,4]
     194// L = 6, pass in bitstream for symbols length [5,6]
     195// L = 8, pass in bitstream for symbols length [7,8]
     196// L = 10, pass in bitstream for symbols length [9,10]
     197// L = 12, pass in bitstream for symbols length [11,12]
     198// L = 14, pass in bitstream for symbols length [13,14]
     199// L = 16, pass in bitstream for symbols length [15,16]
    331200// L = 17, pass in bitstream for symbols length longer than 16
    332201template <int L>
     
    346215}
    347216
    348 
    349 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    350   int error_line, error_column;
    351   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    352   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
     217template <>
     218inline int StreamScanLengthGrouping<17>(ScanBlock * stream, int blk_count) {
     219    int blk;
     220    int block_pos = 0;
     221    for (blk = 0; blk < blk_count; blk++) {
     222        ScanBlock s = stream[blk];
     223        while(s) {
     224            int start_pos = cfzl(s) + block_pos;
     225            int end_pos = ScanForwardPos (&elem_ends, start_pos);
     226            if (end_pos)
     227            {
     228                ElemStart_grouping(start_pos - 16, end_pos - start_pos + 16);
     229            }
     230            s = s & (s-1);  // clear rightmost bit.
     231        }
     232        block_pos += 8 * sizeof(ScanBlock);
     233    }
     234    return 0;
    353235}
    354236
     
    509391
    510392    tracker.StoreNewlines(lex.LF);
    511     elem_starts = tag_Callouts.ElemName_starts;
     393    elem_ends = tag_Callouts.ElemName_ends;
    512394    hashvalues[1] = hash_data.Hash_value;
    513395
    514     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1_to_4) )
    515     {
    516         StreamScanLengthGrouping<4>((ScanBlock *) &tag_Callouts.ElemName_ends_4, sizeof(BitBlock)/sizeof(ScanBlock));
    517     }
    518 
    519     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5_to_8) )
    520     {
    521         StreamScanLengthGrouping<8>((ScanBlock *) &tag_Callouts.ElemName_ends_8, sizeof(BitBlock)/sizeof(ScanBlock));
    522     }
    523 
    524     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9_to_12) )
    525     {
    526         StreamScanLengthGrouping<12>((ScanBlock *) &tag_Callouts.ElemName_ends_12, sizeof(BitBlock)/sizeof(ScanBlock));
    527     }
    528 
    529     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13_to_16) )
    530     {
    531         StreamScanLengthGrouping<16>((ScanBlock *) &tag_Callouts.ElemName_ends_16, sizeof(BitBlock)/sizeof(ScanBlock));
    532     }
    533 
    534     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_17_and_longer) )
    535     {
    536         StreamScanLengthGrouping<17>((ScanBlock *) &tag_Callouts.ElemName_ends_17_and_longer, sizeof(BitBlock)/sizeof(ScanBlock));
    537     }
    538 
    539     // Store the last starting position in case we hit boundary case
    540     previous_block_last_elem_start = - count_backward_zeroes (elem_starts);
     396    // Check for block boundary case for length 16 and above
     397    if (block_boundary_case)
     398    {
     399#if DEBUG
     400        printf ("block boundary case! Special handle!\n");
     401#endif
     402        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
     403        int start = block_base + last_elem_start;
     404        int hashvalue = compute_hash_value(lgth, last_elem_start);
     405        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
     406        gids.push_back(gid);
     407#if DEBUG
     408        printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i \n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid);
     409#endif
     410        block_boundary_case = false;
     411    }
     412
     413    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1_to_2) )
     414    {
     415        StreamScanLengthGrouping<2>((ScanBlock *) &tag_Callouts.ElemName_ends_1_to_2, sizeof(BitBlock)/sizeof(ScanBlock));
     416    }
     417
     418    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3_to_4) )
     419    {
     420        StreamScanLengthGrouping<4>((ScanBlock *) &tag_Callouts.ElemName_ends_3_to_4, sizeof(BitBlock)/sizeof(ScanBlock));
     421    }
     422
     423    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5_to_6) )
     424    {
     425        StreamScanLengthGrouping<6>((ScanBlock *) &tag_Callouts.ElemName_ends_5_to_6, sizeof(BitBlock)/sizeof(ScanBlock));
     426    }
     427
     428    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_7_to_8) )
     429    {
     430        StreamScanLengthGrouping<8>((ScanBlock *) &tag_Callouts.ElemName_ends_7_to_8, sizeof(BitBlock)/sizeof(ScanBlock));
     431    }
     432
     433    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9_to_10) )
     434    {
     435        StreamScanLengthGrouping<10>((ScanBlock *) &tag_Callouts.ElemName_ends_9_to_10, sizeof(BitBlock)/sizeof(ScanBlock));
     436    }
     437
     438    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_11_to_12) )
     439    {
     440        StreamScanLengthGrouping<12>((ScanBlock *) &tag_Callouts.ElemName_ends_11_to_12, sizeof(BitBlock)/sizeof(ScanBlock));
     441    }
     442
     443    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13_to_14) )
     444    {
     445        StreamScanLengthGrouping<14>((ScanBlock *) &tag_Callouts.ElemName_ends_13_to_14, sizeof(BitBlock)/sizeof(ScanBlock));
     446    }
     447
     448    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_15_to_16) )
     449    {
     450        StreamScanLengthGrouping<16>((ScanBlock *) &tag_Callouts.ElemName_ends_15_to_16, sizeof(BitBlock)/sizeof(ScanBlock));
     451    }
     452
     453    if ( bitblock_has_bit(tag_Callouts.ElemName_remaining_ends) )
     454    {
     455        StreamScanLengthGrouping<17>((ScanBlock *) &tag_Callouts.ElemName_remaining_ends, sizeof(BitBlock)/sizeof(ScanBlock));
     456    }
    541457
    542458    //copy current hash value data as previous one.
     
    572488    }
    573489
    574     if (bitblock_has_bit(check_streams.error_mask)) {
    575       int errpos = count_forward_zeroes(check_streams.error_mask);
    576       ReportError("error found", errpos);
    577         exit(-1);
    578     }
     490    error_tracker.If_Error_Report_First();
    579491
    580492    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     
    766678        fclose(infile);
    767679        fclose(outfile);
    768 
    769         printf ("Done procressing\n");
    770680        return(0);
    771681}
  • trunk/lib/symtab/bitstream_hash_table.h

    r1278 r1387  
     1/*
     2 * Created: 2011
     3 * Author: Vera Lukman
     4 *
     5 * This hash table is almost an exact copy of HashSymbolTable except that
     6 * BitStreamDivHashTable accepts a hashvalue to insert or lookup for a key.
     7 * This hash table is designed specifically for Parallel Bitstream-Based Length Sorting
     8 * identity and log grouping strategy.
     9 *
     10 */
     11
    112#ifndef BITSTREAM_HASH_TABLE_H
    213#define BITSTREAM_HASH_TABLE_H
     
    718#define TABLE_SIZE 256
    819#define TABLE_SIZE_MASK TABLE_SIZE-1
    9 // This hash table is almost an exact copy of HashSymbolTable except that
    10 // BitStreamHashTable accepts a hashvalue to insert or lookup for a key.
    1120
    1221// Define either one of grouping functions
     
    172181}
    173182
     183// Use this method if L is in [3,4]
    174184inline unsigned int BitStreamHashTable::Lookup_Name_4(char * name, const int hashvalue, int L)
    175185{
     
    202212}
    203213
     214// Use this method if L is in [5,8]
    204215inline unsigned int BitStreamHashTable::Lookup_Name_8(char * name, const int hashvalue, int L)
    205216{
     
    226237}
    227238
     239// Use this method if L is in [9,16]
    228240inline unsigned int BitStreamHashTable::Lookup_Name_16(char * name, const int hashvalue, int L)
    229241{
     
    252264#endif // #if defined (USE_LOG_SORT)
    253265
     266
     267// Use this method if L is in [17,32]
    254268inline unsigned int BitStreamHashTable::Lookup_Name_32(char * name, const int hashvalue, int L)
    255269{
     
    276290}
    277291
     292// Use this method if L > 32
    278293inline unsigned int BitStreamHashTable::Lookup_Name(char * name, const int hashvalue, int L)
    279294{
  • trunk/lib/symtab/equal_compare.h

    r1278 r1387  
    11/*
    2  *      ls_symbol_table_compare.h
     2 *      equal_compare.h
    33 *
    4  *      Created on: 27-May-2010
    5  *      Author: ksherdy
     4 *      Created on: 8-August-2011
     5 *      Author: Vera Lukman
    66 *
    7  *      Length specific comparison functions using a C++ function object or a C++ function template approach.
     7 *      Length specific comparison functions using a C++ function object or a C++ function template approach for Parallel Bitstream-Based Length Sorting identity and log grouping strategy.
    88 */
    99
     
    2222
    2323// ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
    24 // Equivalent symbol-at-a-time comparison methods implemented using C++ function templates.     
     24// Equivalent symbol-at-a-time comparison methods implemented using C++ function templates.
    2525
    26 template<class T>
    27 inline bool equal_compare(const unsigned char * key, const unsigned char * symbol, const unsigned int key_lgth, int symbol_lgth);
    28 
    29 #if defined (USE_IDENTITY_SORT) || defined (USE_DIV_SORT)
     26#if defined (USE_IDENTITY_SORT)
    3027//Use equal_compare<L> for L in [1,16]
    3128//Use equal_compare_32<L> for L in [17,32]
     
    5956                        ((* ((uint64_t *) symbol)) & (0xFFFFFFFFFFFFFFULL << LOW_BYTE_SHIFT))); // s8int64()
    6057}
    61 #endif // #if defined (USE_IDENTITY_SORT) || defined (USE_DIV_SORT) || defined(USE_TEMPLATE)
     58#endif // #if defined (USE_IDENTITY_SORT)
    6259
    63 #if defined (USE_LOG_SORT) && !defined (USE_TEMPLATE)
     60#if defined (USE_LOG_SORT)
    6461inline bool equal_compare_1(const unsigned char * key, const unsigned char * symbol) {
    6562    return compare<1>(key, symbol);
     
    7875
    7976template<class T>
     77inline bool equal_compare(const unsigned char * key, const unsigned char * symbol, const unsigned int key_lgth, int symbol_lgth);
     78
     79// if key_lgth, symbol_lgth in [3,4], T = uint16_t
     80// if key_lgth, symbol_lgth in [5,8], T = uint32_t
     81// if key_lgth, symbol_lgth in [9,16], T = uint64_t
     82// if key_lgth, symbol_lgth in [17,32], T = SIMD_type
     83template<class T>
    8084inline bool equal_compare(const unsigned char * key, const unsigned char * symbol, const unsigned int key_lgth, int symbol_lgth)
    8185{
     
    9397#endif
    9498}
    95 
    96 #endif //#if defined (USE_LOG_SORT) && !defined (USE_TEMPLATE)
    9799
    98100template<>
     
    116118}
    117119
     120#endif //#if defined (USE_LOG_SORT)
     121
    118122#endif /* EQUAL_COMPARE_H_ */
  • trunk/lib/symtab/pbgs_div_symbol_table.h

    r1229 r1387  
     1/*
     2 * Created on: 8-August-2011
     3 * Author: Vera Lukman
     4 *
     5 * This symbol table is designed specifically for Parallel Bitstream-Based Length Sorting
     6 * Division by 2 grouping strategy.
     7 *
     8 */
     9
    110#ifndef PBGS_DIV_SYMBOL_TABLE_H
    211#define PBGS_DIV_SYMBOL_TABLE_H
     
    716//#define USE_LENGTH_SORT       // f(L) = L
    817//#define USE_LOG_SORT          // f(L) = ceil (log(L))
    9 #define USE_DIV_SORT            // f(L) = floor( (L-1)/2^k )
     18#define USE_DIV_SORT            // f(L) = floor ((L+1)/2)
    1019
    11 #define TOTAL_GROUPS 5
     20#define TOTAL_GROUPS 10
    1221#define LAST_GROUP TOTAL_GROUPS-1
    1322
    1423#define USE_FUNCTION_TEMPLATES
    1524#include "stringpool.h"
    16 #include "bitstream_hash_table.h"
     25#include "bitstream_div_hash_table.h"
    1726#include "ls_symbol_table_compare.h"
    1827#include "ls_symbol_table_util.h"
     
    2231using namespace std;
    2332#endif
     33                           //0     1     2    3     4    5    6    7
     34static char delimiters[] = {' ', '\0', '\0', ';', '\0', '=', '>', '/'};
    2435
    2536class PBGSDivSymbolTable
     
    3243    template <int L> inline int Lookup_or_Insert_Name(char* name, int hashvalue);
    3344
     45    inline int Lookup_or_Insert_Name_32(char* name, int hashvalue, int L);
     46
    3447    inline int Lookup_or_Insert_Name(char* name, int hashvalue, int L);
    3548
    3649private:
    3750    StringPool <4096,100> m_pool;
    38     BitStreamHashTable m_hashTable[TOTAL_GROUPS];
     51    BitStreamDivHashTable m_hashTable[TOTAL_GROUPS];
     52    int m_globalNameCount;
    3953
    4054    inline int getHashTableIndex(int L);
    41     int m_globalNameCount;
     55    inline bool isDelimiter(const char c);
    4256};
    4357
    44 template <>
    45         inline int PBGSDivSymbolTable::Lookup_or_Insert_Name<3>(char* name, int hashvalue)
    46 {
    47     int GID = 0;
    48 
    49     char delim = name[3];
    50     name[3] = '\0';
    51     int group = getHashTableIndex(3);
    52 
    53     //Lookup
    54     GID = m_hashTable[group].Lookup_Name<3>(name, hashvalue);
    55     name[3] = delim;
    56 
    57     //Insert
    58     if (!GID) //symbol not found
    59     {
    60         char* c = m_pool.Insert(name, 3, 1);
    61         GID = m_globalNameCount;
    62 
    63         m_hashTable[group].Insert_Name(c, hashvalue, m_globalNameCount);
    64         m_globalNameCount++;
    65     }
    66 #if DEBUG_PBGS_DIV
    67     int advance = 1;
    68     int L = 3;
    69     delim = name[L];
    70     name[L] = '\0';
    71     cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << " | advance: " << advance << endl;
    72     name[L] = delim;
    73 #endif
    74 
    75     return GID;
    76 }
    77 
    78 template <>
    79         inline int PBGSDivSymbolTable::Lookup_or_Insert_Name<5>(char* name, int hashvalue)
    80 {
    81     int GID = 0;
    82 
    83     char delim = name[5];
    84     name[5] = '\0';
    85     int group = getHashTableIndex(5);
    86 
    87     //Lookup
    88     GID = m_hashTable[group].Lookup_Name<5>(name, hashvalue);
    89     name[5] = delim;
    90 
    91     //Insert
    92     if (!GID) //symbol not found
    93     {
    94         char* c = m_pool.Insert(name, 5, 3);
    95         GID = m_globalNameCount;
    96 
    97         m_hashTable[group].Insert_Name(c, hashvalue, m_globalNameCount);
    98         m_globalNameCount++;
    99     }
    100 #if DEBUG_PBGS_DIV
    101     int advance = 3;
    102     int L = 5;
    103     delim = name[L];
    104     name[L] = '\0';
    105     cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << " | advance: " << advance << endl;
    106     name[L] = delim;
    107 #endif
    108 
    109     return GID;
    110 }
    111 
    112 template <>
    113         inline int PBGSDivSymbolTable::Lookup_or_Insert_Name<6>(char* name, int hashvalue)
    114 {
    115     int GID = 0;
    116 
    117     char delim = name[6];
    118     name[6] = '\0';
    119     int group = getHashTableIndex(6);
    120 
    121     //Lookup
    122     GID = m_hashTable[group].Lookup_Name<6>(name, hashvalue);
    123     name[6] = delim;
    124 
    125     //Insert
    126     if (!GID) //symbol not found
    127     {
    128         char* c = m_pool.Insert(name, 6, 2);
    129         GID = m_globalNameCount;
    130 
    131         m_hashTable[group].Insert_Name(c, hashvalue, m_globalNameCount);
    132         m_globalNameCount++;
    133     }
    134 #if DEBUG_PBGS_DIV
    135     int advance = 2;
    136     int L = 6;
    137     delim = name[L];
    138     name[L] = '\0';
    139     cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << " | advance: " << advance << endl;
    140     name[L] = delim;
    141 #endif
    142 
    143     return GID;
    144 }
    145 
    146 template <>
    147         inline int PBGSDivSymbolTable::Lookup_or_Insert_Name<7>(char* name, int hashvalue)
    148 {
    149     int GID = 0;
    150 
    151     char delim = name[7];
    152     name[7] = '\0';
    153     int group = getHashTableIndex(7);
    154 
    155     //Lookup
    156     GID = m_hashTable[group].Lookup_Name<7>(name, hashvalue);
    157     name[7] = delim;
    158 
    159     //Insert
    160     if (!GID) //symbol not found
    161     {
    162         char* c = m_pool.Insert(name, 7, 1);
    163         GID = m_globalNameCount;
    164 
    165         m_hashTable[group].Insert_Name(c, hashvalue, m_globalNameCount);
    166         m_globalNameCount++;
    167     }
    168 #if DEBUG_PBGS_DIV
    169     int advance = 1;
    170     int L = 7;
    171     delim = name[L];
    172     name[L] = '\0';
    173     cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << " | advance: " << advance << endl;
    174     name[L] = delim;
    175 #endif
    176 
    177     return GID;
    178 }
    179 
    180 // Use this function for 1,2, 4, 8 <= L <= 32
     58// L should be an even number, L is in [1,16]
    18159template <int L> inline int PBGSDivSymbolTable::Lookup_or_Insert_Name(char* name, int hashvalue)
    18260{
    18361    int GID = 0;
    184 
    185     char delim = name[L];
    186     name[L] = '\0';
    18762    int group = getHashTableIndex(L);
    18863
    18964    //Lookup
    19065    GID = m_hashTable[group].Lookup_Name<L>(name, hashvalue);
    191     name[L] = delim;
    19266
    193     //Insert
    19467    if (!GID) //symbol not found
    19568    {
     69        //Check if the last character is a delimiter
     70        if (isDelimiter(name[L-1]))
     71        {
     72            //Do another lookup for name with L = L-1
     73            GID = m_hashTable[group].Lookup_Name_Delimiter<L-1>(name, hashvalue);
     74
     75            if (GID)
     76            {
     77                //Symbol found, return immediately
     78                return GID;
     79            }
     80        }
     81
     82        //Insert
    19683        char* c = m_pool.Insert(name, L);
    19784        GID = m_globalNameCount;
     
    20188    }
    20289#if DEBUG_PBGS_DIV
    203     delim = name[L];
     90    char delim = name[L];
    20491    name[L] = '\0';
    20592    cout << "Lookup or Insert: " << name << " | lgth: " << L << " | group: " << group << endl;
     
    21097}
    21198
    212 // Use this function for L > 32
     99// Use this function for L > 16
    213100inline int PBGSDivSymbolTable::Lookup_or_Insert_Name(char* name, int hashvalue, int L)
    214101{
    215102    int GID = 0;
    216 
    217     char delim = name[L];
    218     name[L] = '\0';
    219103
    220104#if DEBUG_PBGS_DIV
     
    225109    //Lookup
    226110    GID = m_hashTable[group].Lookup_Name(name, hashvalue,L);
    227 
    228     name[L] = delim;
    229111
    230112    //Insert
     
    240122}
    241123
     124
     125inline bool PBGSDivSymbolTable::isDelimiter(const char c)
     126{
     127    return c == delimiters[(unsigned int) c & 0x7];
     128}
     129
    242130inline int PBGSDivSymbolTable::getHashTableIndex(int L)
    243131{
     
    245133        return LAST_GROUP;
    246134    else
    247         return L/4;
     135        return (L+1)/2;
    248136}
    249137#endif // PBGS_DIV_SYMBOL_TABLE_H
  • trunk/lib/symtab/pbgs_identity_symbol_table.h

    r1277 r1387  
     1/*
     2 * Created: 2011
     3 * Author: Vera Lukman
     4 *
     5 * This symbol table is designed specifically for Parallel Bitstream-Based Length Sorting
     6 * Identity grouping strategy.
     7 *
     8 */
     9
    110#ifndef PBGS_LENGTH_SYMBOL_TABLE_H
    211#define PBGS_LENGTH_SYMBOL_TABLE_H
  • trunk/lib/symtab/pbgs_log_symbol_table.h

    r1278 r1387  
     1/*
     2 * Created: 2011
     3 * Author: Vera Lukman
     4 *
     5 * This symbol table is designed specifically for Parallel Bitstream-Based Length Sorting
     6 * Log grouping strategy.
     7 *
     8 */
     9
     10
    111#ifndef PBGS_SYMBOL_TABLE_H
    212#define PBGS_SYMBOL_TABLE_H
Note: See TracChangeset for help on using the changeset viewer.