Changeset 1969


Ignore:
Timestamp:
Mar 25, 2012, 12:11:27 AM (7 years ago)
Author:
ksherdy
Message:

Restructuring.

Location:
trunk/symbol_table
Files:
1 added
2 deleted
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/main_template.cpp

    r1960 r1969  
    33 * Author: Ken Herdy
    44 *
    5  * Quick and dirty.
     5 * Length sorted symbol table main.
     6 *
     7 * Lookahead versus Lookback
     8 *
     9 * The current implementation applies bit stream length grouping based on 'end' markers.
     10 * In a sense, 'end' markers are precomputed 'lookahead'.
     11 * True 'lookahead' would compute the current block and number of 'lookahead' position and
     12 * support 'shift back' and to mark the 'start' rather than the 'end' positions of lexical items.
     13 *
     14 * In any case, the current implementation 'expects' that the previous block will be located in a contiguous
     15 * memory location that may be indexed as some negative offset of the base address of the current
     16 * block.
     17 *
     18 * Further, to reduce complexity in processing, although structs of BitBlock types are not stored
     19 * contiguously in memory, BitBlock struct members are copied into contiguous memory positions.
     20 *
     21 * Design Issues
     22 *
     23 * (1) Max hash table size.
     24 * (2) Negative shift values.
     25 * (3) Template classes on Length L.
    626 *
    727 */
     
    1232#include "../lib/s2p.hpp"
    1333#include "../lib/perflib/perfsec.h"
    14 
    15 // GENERATED
    16 #include "marker_strms.hpp"
    17 // GENERATED
    18 #include "hash_strms.hpp"
    19 // GENERATED
    20 #include "id_group_strms.hpp"
    21 
     34#include "marker_strms.hpp"     // GENERATED HEADER
     35#include "hash_strms.hpp"       // GENERATED HEADER
     36#include "id_group_strms.hpp"   // GENERATED HEADER
    2237#include "id_symbol_table.hpp"
    23 
    2438#include <string>
    2539#include <iostream>
     
    4155#endif
    4256
    43 typedef struct symbol: public AoS_symbol
    44 {
    45     string name;
    46     char d;
    47 } symbol;
    48 
     57///////////////////////////////////////////////////////////////////////////
     58// Buffer Management
     59///////////////////////////////////////////////////////////////////////////
    4960#define PADDING_BLOCKS 1
    5061#define PADDING_SIZE BLOCK_SIZE * PADDING_BLOCKS
    5162#define LOOKBACK_BLOCKS 1
    5263#define LOOKBACK_SIZE BLOCK_SIZE * LOOKBACK_BLOCKS
    53 #define SEGMENT_BLOCKS 8
    54 #define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS) // multiple of BLOCK_SIZE (bytes)
     64#define SEGMENT_BLOCKS 10
     65#define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS)                                          // (bytes) a multiple of BLOCK_SIZE
     66#define SEGMENT_ALLOC_SIZE (LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock) // (bytes)
     67
     68// Target symbol type must inherit from AoS_symbol
     69typedef struct symbol: public AoS_symbol
     70{
     71    string parameter_1;
     72    string parameter_2;
     73    bool parameter_3;
     74} symbol;
    5575
    5676int main(int argc, char * argv[]) {
     
    7595    PERF_SEC_INIT(parser_timer);
    7696
    77     /* Byte Buffer */
    78     BitBlock aligned_buffer[(LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock)];
     97    ///////////////////////////////////////////////////////////////////////////
     98    // Stream Definitions
     99    ///////////////////////////////////////////////////////////////////////////
     100
     101    // Byte Segments - Raw byte streams - With lookback.
     102    BitBlock aligned_buffer[SEGMENT_ALLOC_SIZE];
    79103    uint8_t * lookback = (uint8_t *)aligned_buffer;
    80104    memset(lookback,0,LOOKBACK_SIZE);
    81     uint8_t * raw_buffer = lookback + LOOKBACK_SIZE;
    82 
    83     /* Bit Stream Hash Buffers */
    84 
    85     // TODO - Verify
    86 
    87     /* h0 */
    88     BitBlock aligned_h0[(LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock)];
     105    uint8_t * raw_buffer = &lookback[LOOKBACK_SIZE];
     106
     107    // Bit Segments - Hash bit streams - With lookback.
     108
     109    // hash 0
     110    BitBlock aligned_h0[SEGMENT_ALLOC_SIZE/8];
    89111    BitBlock * lookback_h0 = (BitBlock *) aligned_h0;
    90     memset(lookback_h0,0,sizeof(BitBlock));
    91     BitBlock * h0 = (BitBlock *)(lookback_h0 + 1); // TODO - Correct
    92 
    93     /* h1 */
    94     BitBlock aligned_h1[(LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock)];
     112    memset(lookback_h0,0,LOOKBACK_SIZE/BLOCK_SIZE);
     113    BitBlock * h0 = &lookback_h0[LOOKBACK_SIZE/BLOCK_SIZE];
     114
     115    // hash 1
     116    BitBlock aligned_h1[SEGMENT_ALLOC_SIZE/8];
    95117    BitBlock * lookback_h1 = (BitBlock *) aligned_h1;
    96     memset(lookback_h1,0,sizeof(BitBlock));
    97     BitBlock * h1 = (BitBlock *)(lookback_h1 + 1); // TODO - Correct
    98 
    99     /* BitSteams */
     118    memset(lookback_h1,0,LOOKBACK_SIZE/BLOCK_SIZE);
     119    BitBlock * h1 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE];
     120
     121    // BitSteams - Without lookback
    100122    Basis_bits basis_bits[SEGMENT_BLOCKS];
    101123    Markers markers[SEGMENT_BLOCKS];
     
    103125    Groups groups[SEGMENT_BLOCKS];
    104126
    105     /* Symbol Table */
     127    // Symbol Table
    106128    const uint32_t SYMBOL_COUNT = SEGMENT_SIZE;
    107129    symbol symbol_ary[SYMBOL_COUNT];
     
    111133    uint32_t chars_avail = is.gcount();
    112134
     135    ///////////////////////////////////////////////////////////////////////////
     136    // Full Segments
     137    ///////////////////////////////////////////////////////////////////////////
    113138    while (chars_avail >= SEGMENT_SIZE) {
    114139
    115140      uint32_t blk;
    116141      for(blk=0;blk<SEGMENT_BLOCKS;blk++) {
    117           s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);
    118           markers_do_block(basis_bits[blk], markers[blk]);
    119           hash_strms_do_block(basis_bits[blk], hash[blk]);
    120           identity_group_do_block(markers[blk], groups[blk]);
     142        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);    // transpose
     143        markers_do_block(basis_bits[blk], markers[blk]);                            // gen symbol spans, mark starts & follows
     144        hash_strms_do_block(basis_bits[blk], hash[blk]);                            // gen hash bit streams
     145        identity_group_do_block(markers[blk], groups[blk]);                         // sort marker bit stream (identity)
    121146      }
    122147
    123 //      for(int k=0;k<SEGMENT_BLOCKS;k++) {
     148//    for(int k=0;k<SEGMENT_BLOCKS;k++) {
    124149//        cout << "RAW " << string((((char*)&raw_buffer[0])+k*BLOCK_SIZE),BLOCK_SIZE) << endl;
    125 //      }
    126 
    127       /* Write contiguous hash bit streams */
    128       for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
    129           h0[blk] = hash[blk].h0;
    130           h1[blk] = hash[blk].h1;
     150//    }
     151
     152      for(int blk=0;blk<SEGMENT_BLOCKS;blk++) { // write contiguous hash bit streams
     153        h0[blk] = hash[blk].h0;
     154        h1[blk] = hash[blk].h1;
    131155      }
    132156
     
    135159      PERF_SEC_END(parser_timer, SEGMENT_SIZE);
    136160
    137       // print_register("h1[S]",h1[SEGMENT_BLOCKS-1]);
    138 
    139       memmove(lookback,raw_buffer+SEGMENT_SIZE-LOOKBACK_SIZE,LOOKBACK_SIZE); /* copy final block to lookback */
    140       //memmove(lookback_h0,((uint8_t *)h0)+((SEGMENT_SIZE-LOOKBACK_SIZE)/sizeof(BitBlock)),sizeof(BitBlock)); /* copy final block to lookback */
    141       //memmove(lookback_h1,((uint8_t *)h1)+((SEGMENT_SIZE-LOOKBACK_SIZE)/sizeof(BitBlock)),sizeof(BitBlock)); /* copy final block to lookback */
    142 
    143       lookback_h0[0] = h0[SEGMENT_BLOCKS-1];
    144       lookback_h1[0] = h1[SEGMENT_BLOCKS-1];
    145       //print_register<BitBlock>("h1[S]",*(BitBlock *)&lookback_h1[0]);
    146 
    147       //exit(1);
    148 
     161      // copy loopback bytes
     162      memmove(lookback,&raw_buffer[SEGMENT_SIZE-LOOKBACK_SIZE],LOOKBACK_SIZE);
     163      // copy loopback bits
     164      memmove(lookback_h0,&((uint8_t *)h0)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
     165      memmove(lookback_h1,&((uint8_t *)h1)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
     166
     167      //lookback_h0[0] = h0[SEGMENT_BLOCKS-1];
     168      //lookback_h1[0] = h1[SEGMENT_BLOCKS-1];
    149169      is.read ((char *)(raw_buffer), SEGMENT_SIZE);
    150170      chars_avail = is.gcount();
    151 
    152     }
    153 
    154     //PERF_SEC_START(parser_timer);
    155     /* Partial Segments */
    156 
     171    }
     172
     173    /* Resolve Partial Segments */
    157174    uint32_t remaining = chars_avail;
    158175
    159     /* Full Blocks */
     176    ///////////////////////////////////////////////////////////////////////////
     177    // Full blocks
     178    ///////////////////////////////////////////////////////////////////////////
    160179    uint32_t blk = 0;
    161180    while (remaining >= BLOCK_SIZE) {
     
    168187    }
    169188
    170     /* Partial Block or carry */
     189    ///////////////////////////////////////////////////////////////////////////
     190    // Final partial block or any carry
     191    ///////////////////////////////////////////////////////////////////////////
    171192    if (remaining > 0 || @marker_strms_any_carry /*|| hash_strms_any_carry*/) {
    172           BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining)); /* null padding byte */
     193          BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    173194          s2p_do_final_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk], EOF_mask);
    174195          markers_do_final_block(basis_bits[blk], markers[blk], EOF_mask);
     
    182203//    }
    183204
    184     /* Write contiguous hash bit streams */
     205
    185206    uint32_t segment_size = blk;
    186 
    187     for(int blk=0;blk<segment_size;blk++) {
     207    for(int blk=0;blk<segment_size;blk++) { // write contiguous hash bit streams
    188208        h0[blk] = hash[blk].h0;
    189209        h1[blk] = hash[blk].h1;
    190210    }
    191211
    192     PERF_SEC_START(parser_timer);
     212    //PERF_SEC_START(parser_timer);
    193213    symbol_table.resolve(raw_buffer, groups, h0, h1, blk, symbol_ary, SYMBOL_COUNT);
    194     PERF_SEC_END(parser_timer, chars_avail+1);
    195 
    196     /* WARNING */
    197     // if(remaining==0 && @markers_any_cary) {
    198     // @ _any_carry - equivalent to single byte look ahead
    199     // Any case in which we have a partial block, we need to know the boundary of the partial block to know when to store carry information.
    200     // Any case in which we must evaluate a bit value at position 'one past a boudary' can be handled within final block logic.
    201     // }
    202 
    203     // PBS Modules
    204     // Pablo block processing structure.
    205     // Four Cases --- Move to xml.dnsdojo.com
    206     // do_init_block(), do_block(), do_final_block(), do_all()
    207     // Four cases
    208     // - do_init_block() restrict to initialization only, do_block() does not execute, do_final_block() executes
    209 
    210     // Process full segments in sub modules, ie. sizeof(BitBlock) * 8 bytes,
    211     // do_segment()
    212     // - do_segment(uint8_t * buffer, BitBlock * strm_1, ..., BitBlock * strm_k, uint32_t byte_count)
    213     //
    214     // Handle 'while(segments)', while(full blocks), if(partial or carry) on the main application module.
    215 
    216     // Lookahead/Lookback
    217     // Current implementation bit stream length grouping strategy leverage 'end' markers.
    218     // 'end' markers in a sense are precomputed 'lookahead'
    219     // True 'lookahead' would compute the current block and number of 'lookahead' position to
    220     // support 'shift back' and the mark the 'start' rather than the 'end' positions of lexical items.
    221 
    222     // The current implementation 'expects' that the previous block will be located in a contiguous
    223     // memory location that may be indexed as some negative offset of the base address of the current
    224     // block.
    225 
    226     // Max hash table size, negative shift values.
    227     // Both in symbols that cross boundaries as well as in hash_strategy classes, hard coding.
    228     // Template parameters on Length L,
    229     // Bits vs. Bytes?
     214    //PERF_SEC_END(parser_timer, chars_avail+1);
    230215
    231216    PERF_SEC_DUMP(parser_timer);
  • trunk/symbol_table/marker_strms.py

    r1960 r1969  
    55
    66# Calculate Symbol spans
     7# markers.spans = [-_:.a-zA-Z0-9] character set compiler def'n, matches 'st_test_file_generator.py' def'n
    78def Classify_markers(basis_bits, markers):
    89    temp1 = (basis_bits.bit_0 | basis_bits.bit_1)
     
    2021    temp13 = (temp10 & temp12)
    2122    temp14 = (temp7 | temp13)
    22     temp15 = (basis_bits.bit_6 &~ basis_bits.bit_7)
    23     temp16 = (temp4 & temp15)
    24     temp17 = (temp3 & temp16)
    25     temp18 = (temp14 | temp17)
    26     temp19 = (basis_bits.bit_2 & basis_bits.bit_3)
    27     temp20 = (temp19 &~ temp1)
    28     temp21 = (basis_bits.bit_5 | temp11)
    29     temp22 = (basis_bits.bit_4 & temp21)
    30     temp23 = (temp20 &~ temp22)
    31     temp24 = (temp18 | temp23)
    32     temp25 = (temp8 &~ basis_bits.bit_2)
    33     temp26 = (~temp22)
    34     temp27 = (basis_bits.bit_4 | basis_bits.bit_5)
    35     temp28 = (basis_bits.bit_6 | basis_bits.bit_7)
    36     temp29 = (temp27 | temp28)
    37     temp30 = ((basis_bits.bit_3 & temp26)|(~(basis_bits.bit_3) & temp29))
    38     temp31 = (temp25 & temp30)
    39     temp32 = (temp24 | temp31)
    40     temp33 = (temp8 & basis_bits.bit_2)
    41     temp34 = (temp33 & temp30)
    42     markers.spans = (temp32 | temp34)
     23    temp15 = (basis_bits.bit_2 & basis_bits.bit_3)
     24    temp16 = (temp15 &~ temp1)
     25    temp17 = (basis_bits.bit_4 &~ basis_bits.bit_5)
     26    temp18 = (basis_bits.bit_6 &~ basis_bits.bit_7)
     27    temp19 = (temp17 & temp18)
     28    temp20 = (temp16 & temp19)
     29    temp21 = (temp14 | temp20)
     30    temp22 = (temp4 & temp18)
     31    temp23 = (temp3 & temp22)
     32    temp24 = (temp21 | temp23)
     33    temp25 = (temp8 & basis_bits.bit_2)
     34    temp26 = (basis_bits.bit_5 | temp11)
     35    temp27 = (basis_bits.bit_4 & temp26)
     36    temp28 = (~temp27)
     37    temp29 = (basis_bits.bit_4 | basis_bits.bit_5)
     38    temp30 = (basis_bits.bit_6 | basis_bits.bit_7)
     39    temp31 = (temp29 | temp30)
     40    temp32 = ((basis_bits.bit_3 & temp28)|(~(basis_bits.bit_3) & temp31))
     41    temp33 = (temp25 & temp32)
     42    temp34 = (temp24 | temp33)
     43    temp35 = (temp8 &~ basis_bits.bit_2)
     44    temp36 = (temp35 & temp32)
     45    temp37 = (temp34 | temp36)
     46    temp38 = (basis_bits.bit_5 | basis_bits.bit_6)
     47    temp39 = (basis_bits.bit_4 & temp38)
     48    temp40 = (temp16 &~ temp39)
     49    markers.spans = (temp37 | temp40)
     50
    4351
    4452
Note: See TracChangeset for help on using the changeset viewer.