1 | #include <vector> |
2 | #include <limits.h> |
3 | #include <algorithm> |
4 | |
5 | using namespace std; |
6 | |
7 | bool comparison (int i,int j) { return (i<j); } |
8 | |
9 | class SymbolStatGatherer |
10 | { |
11 | public: |
12 | void print_symbol_statistic(); |
13 | void store_symbol_density(int symbol_start, int symbol_end); |
14 | void store_symbol_length(int L); |
15 | void store_symbol_gid(int GID); |
16 | void clear_all(); |
17 | |
18 | private: |
19 | vector <int> symbol_lgth; |
20 | vector <int> density_starts; |
21 | vector <int> density_ends; |
22 | vector <int> gids; |
23 | |
24 | vector<int> compute_symbol_density(vector<int> density_starts, vector<int> density_ends); |
25 | int min(vector<int>v); |
26 | int max(vector<int>v); |
27 | float total(vector<int>v); |
28 | double average(vector<int>v); |
29 | }; |
30 | |
31 | void SymbolStatGatherer::clear_all() |
32 | { |
33 | symbol_lgth.clear(); |
34 | density_starts.clear(); |
35 | density_ends.clear(); |
36 | gids.clear(); |
37 | } |
38 | |
39 | void SymbolStatGatherer::store_symbol_density(int symbol_start, int symbol_end) |
40 | { |
41 | if (!density_starts.size()) |
42 | { |
43 | density_starts.push_back(0); |
44 | } |
45 | |
46 | density_ends.push_back(symbol_start); |
47 | |
48 | if (symbol_end > -1) |
49 | { |
50 | density_starts.push_back(symbol_end); |
51 | } |
52 | } |
53 | |
54 | void SymbolStatGatherer::store_symbol_length(int L) |
55 | { |
56 | symbol_lgth.push_back(L); |
57 | } |
58 | |
59 | void SymbolStatGatherer::store_symbol_gid(int GID) |
60 | { |
61 | gids.push_back(GID); |
62 | } |
63 | |
64 | void SymbolStatGatherer::print_symbol_statistic() |
65 | { |
66 | // symbol counts |
67 | printf ("Total symbols: %i\n", gids.size()); |
68 | |
69 | // symbol length |
70 | printf ("Min length: %i \n", min(symbol_lgth)); |
71 | printf ("Max length: %i \n", max(symbol_lgth)); |
72 | printf ("Avg length: %f \n", average(symbol_lgth)); |
73 | |
74 | // frequency of unique symbols |
75 | printf ("Total unique symbols: %i\n", max(gids)); |
76 | |
77 | // symbol density (symbol separation distance) |
78 | vector<int> symbol_density = compute_symbol_density(density_starts, density_ends); |
79 | // printf ("Avg symbol density: %f\n", average(symbol_density)); |
80 | float total_lgth = total(symbol_lgth); |
81 | float total_delimiters = total(symbol_density); |
82 | printf ("Symbol density: %f\n", total_lgth/(total_lgth+total_delimiters)); |
83 | } |
84 | |
85 | vector<int> SymbolStatGatherer::compute_symbol_density(vector<int> density_starts, vector<int> density_ends) |
86 | { |
87 | vector<int> symbol_density; |
88 | int size = density_starts.size(); |
89 | sort (density_starts.begin(), density_starts.end(), comparison); |
90 | sort (density_ends.begin(), density_ends.end(), comparison); |
91 | |
92 | for(int i = 0; i < size; i++) |
93 | { |
94 | int start, end, density; |
95 | start = density_starts[i]; |
96 | end = density_ends[i]; |
97 | density = end - start; |
98 | |
99 | symbol_density.push_back(density); |
100 | } |
101 | return symbol_density; |
102 | } |
103 | |
104 | int SymbolStatGatherer::min(vector<int> v) |
105 | { |
106 | int result = INT_MAX; |
107 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
108 | { |
109 | int number = *it; |
110 | |
111 | if (number < result) |
112 | { |
113 | result = number; |
114 | } |
115 | } |
116 | |
117 | if (!v.size()) |
118 | { |
119 | return 0; |
120 | } |
121 | return result; |
122 | } |
123 | |
124 | float SymbolStatGatherer::total(vector<int> v) |
125 | { |
126 | float result = 0; |
127 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
128 | { |
129 | int number = *it; |
130 | result += number; |
131 | } |
132 | return result; |
133 | } |
134 | |
135 | int SymbolStatGatherer::max(vector<int> v) |
136 | { |
137 | int result = INT_MIN; |
138 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
139 | { |
140 | int number = *it; |
141 | |
142 | if (number > result) |
143 | { |
144 | result = number; |
145 | } |
146 | } |
147 | |
148 | if (!v.size()) |
149 | { |
150 | return 0; |
151 | } |
152 | return result; |
153 | } |
154 | |
155 | double SymbolStatGatherer::average(vector<int> v) |
156 | { |
157 | double result = 0.0; |
158 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
159 | { |
160 | int number = *it; |
161 | |
162 | result += number; |
163 | } |
164 | return result/v.size(); |
165 | } |
