- Timestamp:
- May 17, 2012, 1:55:37 PM (7 years ago)
- Location:
- trunk/symbol_table
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/symbol_table/Makefile
r2106 r2113 39 39 MAIN_OUTFILE=src/main.cpp 40 40 41 ########################################################################### 42 # Symbol Table Strategy Builds 43 ########################################################################### 44 41 45 id: markers hash id_group src/main.cpp 42 46 python $(PABLO_COMPILER) $(MARKER_PABLO_SRC) -t $(MAIN_OUTFILE) -l $(MARKER_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG) … … 53 57 bit_byte: markers hash bit_byte_group src/main.cpp 54 58 python $(PABLO_COMPILER) $(MARKER_PABLO_SRC) -t $(MAIN_OUTFILE) -l $(MARKER_PREFIX) -o $(MAIN_OUTFILE) $(PABLO_ADD_DEBUG) 59 # 60 ########################################################################### 55 61 56 62 markers: marker_strms.py marker_strms_template.hpp -
trunk/symbol_table/README.txt
r2111 r2113 1 Author: Ken Herdy 2 Created on: May 17, 2012 1 3 2 symbol_table 3 âââ Compiler 4 âââ demo_strms.py 5 âââ bitutil.py 6 â 7 âââ bit_byte_group_strms.py 8 âââ div2_group_strms.py 9 âââ div2_logbase2_group_strms.py 10 âââ id_group_strms.py 11 âââ logbase2_group_strms.py 12 âââ group_strms_template.hpp 13 â 14 âââ hash_strms.py 15 âââ hash_strms_template.hpp 16 â 17 âââ main_template.cpp 18 âââ lib 19 âââ libtest 20 â âââ bitscantest.cpp 21 â âââ byte_compare_generator.cpp 22 â âââ byte_compare_template.cpp 23 â âââ hash_test.cpp 24 â âââ Makefile 25 â âââ pool_test.cpp 26 â 27 âââ marker_strms.py 28 âââ marker_strms_template.hpp 4 ABOUT 29 5 30 âââ Makefile 31 âââ README.txt 6 This project builds length sorted symbol table test driver(s) 7 to validate symbol table correctness and evaluate performance. 32 8 33 âââ src 34 â âââ basis_bits.hpp 35 â âââ buffer.hpp 36 â âââ byte_pool.hpp 37 â âââ compare_strategy.hpp 38 â âââ gid.hpp 39 â âââ group_strms.hpp 40 â âââ hash_strategy.hpp 41 â âââ hash_strms.hpp 42 â âââ hash_table.hpp 43 â âââ Makefile 44 â âââ marker_strms.hpp 45 â âââ strategy_types.hpp 46 â âââ symbol_table.hpp 47 â âââ transpose.hpp 48 âââ test 49 â âââ gen_test_file.py 50 â âââ Makefile 51 â âââ pin 52 â â âââ (L_O_U)_1_1.test 53 â âââ pout 54 â â âââ (L_O_U)_1_1.test 55 â âââ run_tests.py 56 â âââ st_test_file_generator.py 57 âââ transpose.hpp 9 The test driver is structured to slurp and process a entire 10 'generated' CSV test file on execution. 11 12 Test files are generated using the python script 13 'test/gen_test_file.py' or 'test/st_test_file_generator.py'. 14 15 Test files: 16 17 (i) begin with and separate each symbol with a single comma, 18 (ii) contain symbols characters defined by the the character class [-a-zA-Z0-9_:.], 19 (iii) do not contain EOF characters. Avoid hand editing test files 20 within applications that append EOF characters, 21 such as gedit, or 'diff' tests report missing newline differences. 22 23 HOW TO BUILD 24 25 This project currently supports symbol driver 26 builds for the each of the following length group strategies. 27 28 id G(L) = L 29 div2 G(L) = ceiling(L/2) 30 logbase2 G(L) = logbase2(L) 31 div2_logbase2 G(L) = div2(L) if L < k, k is even else logbase2(L) 32 bit_byte G(L) = 7 if L < 8 else G(L) = 0 33 34 This project adopts a two stage build process in which, 35 36 (i) the root directory Makefile compiles parallel bit stream source files to C++, and 37 (ii) the src directory Makefile compiles generated C++ code. 38 39 --- PERF BUILD --- 40 41 Builds symbol table driver for performance profiling. 42 43 cd symbol_table 44 make {id, div2, logbase2, div2_logbase2, bit_byte} 45 cd src 46 symbol_table/src/Makefile make {id, div2, logbase2, div2_logbase2, bit_byte} 47 ./id ../test/pin/\(L,0,U\)+_1_1.test 48 49 For example, 50 51 make id 52 cd src/ 53 make id 54 ./id ../test/pin/\(1_1000_50\)_\(2_1000_50\)_\(3_1000_50\)_\(4_1000_50\)_1_1.test 55 BOM 16: 1 (avg time: 43735 cyc/kElem) Cumulative: 1 (avg: 43735 cyc/kElem) 56 57 58 --- GID TEST --- 59 60 A rudimentary test that evaluates that the correct number of unique GIDs 61 are allocated for each *.test file in the test/pin directory. 62 63 For example, 64 65 make id 66 cd src/ 67 make id_gid_test 68 69 . 70 . 71 . 72 (9_1000_50)_(10_1000_50)_(11_1000_50)_(12_1000_50)_1_1.test 73 Expected: 200 74 Actual: 200 75 . 76 . 77 . 78 All pass. 79 80 NOTE: 81 82 File format: (Length, Occurences, Unique)...(Length, Occurences, Unique)_1_1.test 83 84 --- DIFF TEST --- 85 86 A round trip test that for each *.test file in the test/pin directory: 87 (i) resolves the unique symbol gid for each symbol, 88 (ii) writes the gid to the corresponding symbol starts position in the occurences array, 89 (iii) looks up each the gid for each symbol start position, 90 (iv) generates an output file to the test/pout directory, and 91 (v) diffs test/pin and test/pout. 92 93 make id 94 cd src/ 95 make id_diff_test 96 97 diff -rq --exclude=".svn" ../test/pin ../test/pout || exit 0 98 99 NOTE: No differences reported indicates all diff tests pass. 100 101 102 PROJECT STRUCTURE 103 104 symbol_table 105 â 106 âââ Compiler 107 âââ demo_strms.py 108 âââ bitutil.py 109 â 110 âââ bit_byte_group_strms.py 111 âââ div2_group_strms.py 112 âââ div2_logbase2_group_strms.py 113 âââ id_group_strms.py 114 âââ logbase2_group_strms.py 115 âââ group_strms_template.hpp 116 â 117 âââ hash_strms.py 118 âââ hash_strms_template.hpp 119 â 120 âââ main_template.cpp 121 â 122 âââ marker_strms.py 123 âââ marker_strms_template.hpp 124 â 125 âââ Makefile 126 âââ README.txt 127 â 128 âââ src 129 â âââ basis_bits.hpp 130 â âââ buffer.hpp 131 â âââ byte_pool.hpp 132 â âââ compare_strategy.hpp 133 â âââ gid.hpp 134 â âââ group_strms.hpp 135 â âââ hash_strategy.hpp 136 â âââ hash_strms.hpp 137 â âââ hash_table.hpp 138 â âââ Makefile 139 â âââ marker_strms.hpp 140 â âââ strategy_types.hpp 141 â âââ symbol_table.hpp 142 â âââ transpose.hpp 143 â 144 âââ test 145 â âââ gen_test_file.py 146 â âââ Makefile 147 â âââ pin 148 â â âââ (L_O_U)_1_1.test 149 â âââ pout 150 â â âââ (L_O_U)_1_1.test 151 â âââ run_tests.py 152 â âââ st_test_file_generator.py 153 â 154 âââ lib 155 âââ libtest 156 â âââ bitscantest.cpp 157 â âââ byte_compare_generator.cpp 158 â âââ byte_compare_template.cpp 159 â âââ hash_test.cpp 160 â âââ Makefile 161 â âââ pool_test.cpp 162 â 163 âââ transpose.hpp 164 -
trunk/symbol_table/src/Makefile
r2106 r2113 17 17 TEST_DST_DIR = ../test/pout 18 18 19 # Perf builds 19 ########################################################################### 20 # Perf Builds 21 ########################################################################### 20 22 id: basis_bits.hpp buffer.hpp byte_pool.hpp hash_strms.hpp hash_table.hpp ../lib/hash.hpp group_strms.hpp symbol_table.hpp main.cpp marker_strms.hpp symbol_table.hpp transpose.hpp 21 23 $(CC) -o id main.cpp $(AFLAGS) -DID_STRATEGY -DBUFFER_PROFILING #-DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG #-DHASH_TABLE_HPP_DEBUG … … 33 35 $(CC) -o bit_byte main.cpp $(AFLAGS) -DBIT_BYTE_STRATEGY -DBUFFER_PROFILING #-DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG #-DHASH_TABLE_HPP_DEBUG 34 36 35 36 # Diff Tests 37 ########################################################################### 38 # Diff Test Builds 39 ########################################################################### 37 40 id_diff_test: id 38 41 $(CC) -o id main.cpp $(AFLAGS) -DID_STRATEGY -DIDENTITY_TEST … … 60 63 diff -rq --exclude=".svn" $(TEST_SRC_DIR) $(TEST_DST_DIR) || exit 0 61 64 62 # GID Tests 65 ########################################################################### 66 # GID Test Builds 67 ########################################################################### 63 68 id_gid_test: 64 69 $(CC) -o id main.cpp $(AFLAGS) -DID_STRATEGY -DGID_TEST -
trunk/symbol_table/src/basis_bits.hpp
r1967 r2113 13 13 } Basis_bits; 14 14 15 16 15 #endif // BASIS_BITS_HPP -
trunk/symbol_table/src/buffer.hpp
r2095 r2113 9 9 #define LOOKBACK_BLOCKS 1 10 10 #define LOOKBACK_SIZE BLOCK_SIZE * LOOKBACK_BLOCKS 11 #define SEGMENT_BLOCKS 5000 // No COPY BACK for test, starts within segment12 #define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS) // (bytes) a multiple of BLOCK_SIZE13 #define SEGMENT_ALLOC_SIZE (LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock) // (bytes)11 #define SEGMENT_BLOCKS 5000 // Slurp file 12 #define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS) // A multiple of BLOCK_SIZE (n bytes) 13 #define SEGMENT_ALLOC_SIZE (LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock) 14 14 15 15 #endif // BUFFER_HPP -
trunk/symbol_table/src/strategy_types.hpp
r2106 r2113 1 /* 2 * strategy_types.hpp 3 * Created on: 15-April-2012 4 * Author: Ken Herdy 5 * 6 */ 1 7 #ifndef STRATEGY_TYPES 2 8 #define STRATEGY_TYPES -
trunk/symbol_table/src/symbol_table.hpp
r2106 r2113 1 1 /* 2 * id_symbol_table.hpp2 * symbol_table.hpp 3 3 * Created on: 18-December-2011 4 4 * Author: Ken Herdy 5 5 * 6 * BitBlock type arguments must adhere to the 'full-block invariant' 7 * and mask partial block with null bytes. 6 * BLOCKSIZE - Register bit width. 7 * Block - BLOCKSIZE contiguous bytes. 8 * Segment - Array of blocks. 8 9 * 9 * Number of length groups must coincide with the 10 * number compiler generated length groups. 10 * Set of Parallel Data Segments i.e. data segments with values in 1-1. 11 * 12 * raw data buffer 13 * symbol starts markers 14 * set of length groups follows markers G1, G2, ... , Gk 15 * 16 * for each block B in the set of parallel data segments 17 * for each length group G in B 18 * for each symbol marker follows position p in G 19 * resolve symbol global identifier gid at p 20 * write symbol gid at position p in gid output array 11 21 * 12 22 */ 13 23 #ifndef ID_SYMBOL_TABLE_TEMPLATE_HPP 14 24 #define ID_SYMBOL_TABLE_TEMPLATE_HPP 15 16 25 17 26 #include "strategy_types.hpp" … … 39 48 40 49 /////////////////////////////////////////////////////////////////////////// 41 // Symbol Type - do_block() 50 // do_block - reverse bit scans fixed length symbols and resolve GIDs. 51 // 52 // Precondition: For each symbol follow marker there exists a corresponding 53 // symbol start marker. 42 54 /////////////////////////////////////////////////////////////////////////// 43 55 template<class GIDS, class HASH_TABLE> … … 49 61 GIDS & gids, GIDFactory & gid_factory, GIDData & gid_data); 50 62 63 /////////////////////////////////////////////////////////////////////////// 64 // do_block - reverse bit scans variable length symbols and resolve GIDs. 65 // 66 // Precondition: For each symbol follow marker there exists a corresponding 67 // symbol start marker. 68 /////////////////////////////////////////////////////////////////////////// 51 69 template<class GIDS, class HASH_TABLE> 52 70 void do_block(uint32_t blk_offset, … … 58 76 59 77 /////////////////////////////////////////////////////////////////////////// 60 // Symbol Type - Array gids. 61 /////////////////////////////////////////////////////////////////////////// 62 78 // GID container - An array of gids. 79 /////////////////////////////////////////////////////////////////////////// 63 80 template<uint32_t SIZE> 64 81 class gid { … … 80 97 } 81 98 82 // Groups & groups83 99 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock follows_0 [], 84 100 BitBlock h0 [], BitBlock h1 [], uint32_t segment_blocks, GIDS & gids) { … … 90 106 } 91 107 92 // Groups & groups93 108 IDISA_ALWAYS_INLINE void resolve(uint32_t blk_offset, uint8_t buffer [], Groups & groups, BitBlock starts[], 94 109 BitBlock * h0, BitBlock * h1, GIDS & gids) { … … 220 235 221 236 /////////////////////////////////////////////////////////////////////////////// 222 // Byte Space Hash237 // Grouping strategy hash table members. 223 238 /////////////////////////////////////////////////////////////////////////////// 224 239 #ifdef ID_STRATEGY … … 262 277 div2_hash_table<6, div2, byte, ALLOCATOR> hash_table_6; 263 278 div2_hash_table<8, div2, bit, ALLOCATOR> hash_table_8; 264 // id_hash_table<1, id, byte, ALLOCATOR> hash_table_1;265 // id_hash_table<2, id, byte, ALLOCATOR> hash_table_2;266 // id_hash_table<3, id, byte, ALLOCATOR> hash_table_3;267 // id_hash_table<4, id, byte, ALLOCATOR> hash_table_4;268 // id_hash_table<5, id, byte, ALLOCATOR> hash_table_5;269 // id_hash_table<6, id, byte, ALLOCATOR> hash_table_6;270 // id_hash_table<7, id, byte, ALLOCATOR> hash_table_7;271 // id_hash_table<8, id, bit, ALLOCATOR> hash_table_8;272 279 logbase2_hash_table<16, logbase2, bit, ALLOCATOR> hash_table_16; 273 280 id_hash_table<0, id, bit, ALLOCATOR> hash_table_0; … … 276 283 bit_byte_hash_table<0, bit_byte, bit, ALLOCATOR> hash_table_0; 277 284 #else 278 #error "Length group strategy not specified. #define {ID_STRATEGY,DIV2_STRATEGY,LOGBASE2_STRATEGY,DIV2_LOGBASE2_STRATEGY,BIT_BYTE}."285 #error "Length group strategy not specified. #define ID_STRATEGY|DIV2_STRATEGY|LOGBASE2_STRATEGY|DIV2_LOGBASE2_STRATEGY|BIT_BYTE_STRATEGY" 279 286 #endif 280 287 281 282 288 }; 283 289 284 /* NOTE: C++ template code and Pablo generated length groups must coincide. */285 286 // Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB287 290 template<class GIDS, class HASH_TABLE> 288 291 void do_block(uint32_t blk_offset, … … 336 339 } 337 340 338 339 // Variable Lengths, reverse scanner logic340 // Precondition: A symbol end is marked iff a symbol start is marked within a buffer segment.341 341 template<class SYMBOL, class HASH_TABLE> 342 342 void do_block(uint32_t blk_offset, … … 390 390 } 391 391 392 gid = h_table.lookup_or_insert(buffer_base, spos, lgth, h0_base, h1_base, gid_factory, gid_data); // WARNING: spos must be >= 0392 gid = h_table.lookup_or_insert(buffer_base, spos, lgth, h0_base, h1_base, gid_factory, gid_data); 393 393 394 394 #ifdef ID_SYMBOL_STORE_SYMBOL_GIDS_AT_END_POSITION … … 399 399 400 400 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 401 //print_symbol_debug(gid, buffer, spos, fpos, lgth);402 401 print_symbol_debug(gid, buffer_base, spos, fpos, gid_data.get_bytes_lgth(gid)); 403 402 #endif … … 409 408 410 409 #endif // ID_SYMBOL_TABLE_TEMPLATE_HPP 411 412 410 413 411 /* // Forward Scan -
trunk/symbol_table/src/transpose.hpp
r1967 r2113 6 6 #include "basis_bits.hpp" 7 7 8 /* s2p Definitions */9 8 static IDISA_ALWAYS_INLINE void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) { 10 9 s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
Note: See TracChangeset
for help on using the changeset viewer.