source: proto/SymbolTable/symbol_stat_gatherer.h @ 3427

Last change on this file since 3427 was 1793, checked in by vla24, 8 years ago

Added some text files for wdc. updated performance test.

File size: 3.7 KB
Line 
1#include <vector>
2#include <limits.h>
3#include <algorithm>
4
5using namespace std;
6
7bool comparison (int i,int j) { return (i<j); }
8
9class SymbolStatGatherer
10{
11public:
12    void print_symbol_statistic();
13    void store_symbol_density(int symbol_start, int symbol_end);
14    void store_symbol_length(int L);
15    void store_symbol_gid(int GID);
16    void clear_all();
17
18private:
19    vector <int> symbol_lgth;
20    vector <int> density_starts;
21    vector <int> density_ends;
22    vector <int> gids;
23
24    vector<int> compute_symbol_density(vector<int> density_starts, vector<int> density_ends);
25    int min(vector<int>v);
26    int max(vector<int>v);
27    float total(vector<int>v);
28    double average(vector<int>v);
29};
30
31void SymbolStatGatherer::clear_all()
32{
33    symbol_lgth.clear();
34    density_starts.clear();
35    density_ends.clear();
36    gids.clear();
37}
38
39void SymbolStatGatherer::store_symbol_density(int symbol_start, int symbol_end)
40{
41    if (!density_starts.size())
42    {
43        density_starts.push_back(0);
44    }
45
46    density_ends.push_back(symbol_start);
47
48    if (symbol_end > -1)
49    {
50        density_starts.push_back(symbol_end);
51    }
52}
53
54void SymbolStatGatherer::store_symbol_length(int L)
55{
56    symbol_lgth.push_back(L);
57}
58
59void SymbolStatGatherer::store_symbol_gid(int GID)
60{
61    gids.push_back(GID);
62}
63
64void SymbolStatGatherer::print_symbol_statistic()
65{
66//    symbol counts
67    printf ("Total symbols: %i\n", gids.size());
68
69//    symbol length
70    printf ("Min length: %i \n", min(symbol_lgth));
71    printf ("Max length: %i \n", max(symbol_lgth));
72    printf ("Avg length: %f \n", average(symbol_lgth));
73
74//    frequency of unique symbols
75    printf ("Total unique symbols: %i\n", max(gids));
76
77//    symbol density (symbol separation distance)
78    vector<int> symbol_density = compute_symbol_density(density_starts, density_ends);
79//    printf ("Avg symbol density: %f\n", average(symbol_density));
80    float total_lgth = total(symbol_lgth);
81    float total_delimiters = total(symbol_density);
82    printf ("Symbol density: %f\n", total_lgth/(total_lgth+total_delimiters));
83}
84
85vector<int> SymbolStatGatherer::compute_symbol_density(vector<int> density_starts, vector<int> density_ends)
86{
87    vector<int> symbol_density;
88    int size = density_starts.size();
89    sort (density_starts.begin(), density_starts.end(), comparison);
90    sort (density_ends.begin(), density_ends.end(), comparison);
91
92    for(int i = 0; i < size; i++)
93    {
94        int start, end, density;
95        start = density_starts[i];
96        end = density_ends[i];
97        density = end - start;
98
99        symbol_density.push_back(density);
100    }
101    return symbol_density;
102}
103
104int SymbolStatGatherer::min(vector<int> v)
105{
106    int result = INT_MAX;
107    for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
108    {
109        int number = *it;
110
111        if (number < result)
112        {
113            result = number;
114        }
115    }
116
117    if (!v.size())
118    {
119        return 0;
120    }
121    return result;
122}
123
124float SymbolStatGatherer::total(vector<int> v)
125{
126    float result = 0;
127    for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
128    {
129        int number = *it;
130        result += number;
131    }
132    return result;
133}
134
135int SymbolStatGatherer::max(vector<int> v)
136{
137    int result = INT_MIN;
138    for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
139    {
140        int number = *it;
141
142        if (number > result)
143        {
144            result = number;
145        }
146    }
147
148    if (!v.size())
149    {
150        return 0;
151    }
152    return result;
153}
154
155double SymbolStatGatherer::average(vector<int> v)
156{
157    double result = 0.0;
158    for (vector<int>::iterator it = v.begin(); it < v.end(); it++)
159    {
160        int number = *it;
161
162        result += number;
163    }
164    return result/v.size();
165}
Note: See TracBrowser for help on using the repository browser.