Changeset 1769


Ignore:
Timestamp:
Dec 12, 2011, 10:14:38 PM (7 years ago)
Author:
vla24
Message:

Added dictionary symbol statistic gatherer

Location:
proto/SymbolTable
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/Makefile

    r1766 r1769  
    3737PBGS_SYMBOLTABLE_LOG_TEMPLATE=symtab_pbgs_log_template.cpp
    3838PBGS_SYMBOLTABLE_DIV_TEMPLATE=symtab_pbgs_div_template.cpp
    39 SYMBOLTABLE_STAT_TEMPLATE=symtab_symbol_stat_gather.cpp
     39
     40XMLWF_STAT_TEMPLATE=symtab_symbol_stat_gather.cpp
     41WCD_STAT_TEMPLATE=wcd_symbol_stat_gather.cpp
    4042
    4143PABLO_FLAGS=#-a
     
    7476        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_DIV) -t $(WCD_PBGS_DIV_TEMPLATE) -o $(WCD_OUTFILE)
    7577
     78wcd_stat:       $(PABLO_WCD_PBGS_ID) # Paralel bitstream based group sorting
     79        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_ID) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
     80        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_ID) -t $(WCD_STAT_TEMPLATE) -o $(WCD_OUTFILE)
     81
    7682symtab_stl:     $(PABLO_SRCFILE)
    7783        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
     
    108114symtab_stat:    $(PABLO_SYMTAB_PBS) # Paralel bitstream based group sorting
    109115        python $(PABLO_COMPILER) $(PABLO_SYMTAB_PBS) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    110         python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS) -t $(SYMBOLTABLE_STAT_TEMPLATE) -o $(XMLWF_OUTFILE)
     116        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS) -t $(XMLWF_STAT_TEMPLATE) -o $(XMLWF_OUTFILE)
    111117
    112118pablo_help:
  • proto/SymbolTable/build_xmlwf.sh

    r1765 r1769  
    1 make symtab_stat
    2 cd src
    3 make all
    4 ./xmlwf ../test/test_files/soap.xml
    5 cd ..
     1cd src && make all && ./xmlwf ../test/test_files/soap.xml && cd ..
    62
  • proto/SymbolTable/symtab_symbol_stat_gather.cpp

    r1766 r1769  
    44#include "../symtab_common_functions.h"
    55#include "../xmlwf_common_functions.h"
    6 #include <limits.h>
     6
     7#include "../symbol_stat_gatherer.h"
    78
    89//#define STREAMSCAN
     
    3031int previous_block_last_elem_start;
    3132BytePack hashvalues[2];
    32 
    33 vector <int> gids;
    34 vector <int> symbol_lgth;
    35 vector <int> density_starts;
    36 vector <int> density_ends;
    3733PBGSIdentitySymbolTable pbgs_symbol_table;
    38 
     34SymbolStatGatherer symbol_stat;
    3935
    4036#ifdef STREAMSCAN
     
    5753template <int L> static inline int ElemEnd_grouping(int pos);
    5854
    59 void symbol_statistic_gathering();
    60 void store_symbol_density(int symbol_start, int symbol_end);
    61 vector<int> compute_symbol_density(vector<int> density_starts, vector<int> density_ends);
    6255
    6356int main(int argc, char * argv[]) {
     
    8174    PERF_SEC_DESTROY(parser_timer);
    8275
    83     symbol_statistic_gathering();
     76    symbol_stat.print_symbol_statistic();
    8477
    8578#if PRINT_SYMBOL_DISTRIBUTION
     
    9992    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
    10093    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    101     gids.push_back(gid);
    102     symbol_lgth.push_back(L);
    103     store_symbol_density(start, end);
     94    symbol_stat.store_symbol_gid(gid);
     95    symbol_stat.store_symbol_length(L);
     96    symbol_stat.store_symbol_density(start, end);
    10497#if DEBUG
    10598    char* symbol = new char[L+1];
     
    126119    {
    127120        gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    128         symbol_lgth.push_back(lgth);
    129         store_symbol_density(start, end);
    130     }
    131     gids.push_back(gid);
     121        symbol_stat.store_symbol_length(lgth);
     122        symbol_stat.store_symbol_density(start, end);
     123    }
     124    symbol_stat.store_symbol_gid(gid);
    132125#if DEBUG
    133126    char* symbol = new char[lgth+1];
     
    150143        start++;
    151144    }
    152 }
    153 
    154 int min(vector<int> v)
    155 {
    156     int result = INT_MAX;
    157     for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
    158     {
    159         int number = *it;
    160 
    161         if (number < result)
    162         {
    163             result = number;
    164         }
    165     }
    166     return result;
    167 }
    168 
    169 int max(vector<int> v)
    170 {
    171     int result = INT_MIN;
    172     for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
    173     {
    174         int number = *it;
    175 
    176         if (number > result)
    177         {
    178             result = number;
    179         }
    180     }
    181     return result;
    182 }
    183 
    184 double average(vector<int> v)
    185 {
    186     double result = 0.0;
    187     for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
    188     {
    189         int number = *it;
    190 
    191         result += number;
    192     }
    193     return result/v.size();
    194 }
    195 
    196 vector<int> compute_symbol_density(vector<int> density_starts, vector<int> density_ends)
    197 {
    198     vector<int> symbol_density;
    199     int size = density_starts.size();
    200 
    201     for(int i = 0; i < size; i++)
    202     {
    203         int start, end, density;
    204         start = density_starts[i];
    205         end = density_ends[i];
    206         density = end - start;
    207 
    208         symbol_density.push_back(density);
    209     }
    210     return symbol_density;
    211 }
    212 
    213 void store_symbol_density(int symbol_start, int symbol_end)
    214 {
    215     if (!density_starts.size())
    216     {
    217         density_starts.push_back(0);
    218     }
    219 
    220     density_ends.push_back(symbol_start);
    221 
    222     if (symbol_end > -1)
    223     {
    224         density_starts.push_back(symbol_end);
    225     }
    226 }
    227 
    228 void symbol_statistic_gathering()
    229 {
    230 //    symbol counts
    231     printf ("Total symbols: %i\n", gids.size());
    232 
    233 //    symbol length
    234     printf ("Min length: %i \n", min(symbol_lgth));
    235     printf ("Max length: %i \n", max(symbol_lgth));
    236     printf ("Avg length: %f \n", average(symbol_lgth));
    237 
    238 //    frequency of unique symbols
    239     printf ("Total unique symbols: %i\n", max(gids));
    240 
    241 //    symbol density (symbol separation distance)
    242     vector<int> symbol_density = compute_symbol_density(density_starts, density_ends);
    243     printf ("Avg symbol density: %f\n", average(symbol_density));
    244145}
    245146
     
    663564    buffer_base = buf_pos;
    664565
    665     store_symbol_density(buffer_base, -1);
     566    symbol_stat.store_symbol_density(buffer_base, -1);
    666567
    667568    matcher.StreamScan(chars_avail);
Note: See TracChangeset for help on using the changeset viewer.