1 | #include <vector> |
---|
2 | #include <limits.h> |
---|
3 | #include <algorithm> |
---|
4 | |
---|
5 | using namespace std; |
---|
6 | |
---|
7 | bool comparison (int i,int j) { return (i<j); } |
---|
8 | |
---|
9 | class SymbolStatGatherer |
---|
10 | { |
---|
11 | public: |
---|
12 | void print_symbol_statistic(); |
---|
13 | void store_symbol_density(int symbol_start, int symbol_end); |
---|
14 | void store_symbol_length(int L); |
---|
15 | void store_symbol_gid(int GID); |
---|
16 | void clear_all(); |
---|
17 | |
---|
18 | private: |
---|
19 | vector <int> symbol_lgth; |
---|
20 | vector <int> density_starts; |
---|
21 | vector <int> density_ends; |
---|
22 | vector <int> gids; |
---|
23 | |
---|
24 | vector<int> compute_symbol_density(vector<int> density_starts, vector<int> density_ends); |
---|
25 | int min(vector<int>v); |
---|
26 | int max(vector<int>v); |
---|
27 | float total(vector<int>v); |
---|
28 | double average(vector<int>v); |
---|
29 | }; |
---|
30 | |
---|
31 | void SymbolStatGatherer::clear_all() |
---|
32 | { |
---|
33 | symbol_lgth.clear(); |
---|
34 | density_starts.clear(); |
---|
35 | density_ends.clear(); |
---|
36 | gids.clear(); |
---|
37 | } |
---|
38 | |
---|
39 | void SymbolStatGatherer::store_symbol_density(int symbol_start, int symbol_end) |
---|
40 | { |
---|
41 | if (!density_starts.size()) |
---|
42 | { |
---|
43 | density_starts.push_back(0); |
---|
44 | } |
---|
45 | |
---|
46 | density_ends.push_back(symbol_start); |
---|
47 | |
---|
48 | if (symbol_end > -1) |
---|
49 | { |
---|
50 | density_starts.push_back(symbol_end); |
---|
51 | } |
---|
52 | } |
---|
53 | |
---|
54 | void SymbolStatGatherer::store_symbol_length(int L) |
---|
55 | { |
---|
56 | symbol_lgth.push_back(L); |
---|
57 | } |
---|
58 | |
---|
59 | void SymbolStatGatherer::store_symbol_gid(int GID) |
---|
60 | { |
---|
61 | gids.push_back(GID); |
---|
62 | } |
---|
63 | |
---|
64 | void SymbolStatGatherer::print_symbol_statistic() |
---|
65 | { |
---|
66 | // symbol counts |
---|
67 | printf ("Total symbols: %i\n", gids.size()); |
---|
68 | |
---|
69 | // symbol length |
---|
70 | printf ("Min length: %i \n", min(symbol_lgth)); |
---|
71 | printf ("Max length: %i \n", max(symbol_lgth)); |
---|
72 | printf ("Avg length: %f \n", average(symbol_lgth)); |
---|
73 | |
---|
74 | // frequency of unique symbols |
---|
75 | printf ("Total unique symbols: %i\n", max(gids)); |
---|
76 | |
---|
77 | // symbol density (symbol separation distance) |
---|
78 | vector<int> symbol_density = compute_symbol_density(density_starts, density_ends); |
---|
79 | // printf ("Avg symbol density: %f\n", average(symbol_density)); |
---|
80 | float total_lgth = total(symbol_lgth); |
---|
81 | float total_delimiters = total(symbol_density); |
---|
82 | printf ("Symbol density: %f\n", total_lgth/(total_lgth+total_delimiters)); |
---|
83 | } |
---|
84 | |
---|
85 | vector<int> SymbolStatGatherer::compute_symbol_density(vector<int> density_starts, vector<int> density_ends) |
---|
86 | { |
---|
87 | vector<int> symbol_density; |
---|
88 | int size = density_starts.size(); |
---|
89 | sort (density_starts.begin(), density_starts.end(), comparison); |
---|
90 | sort (density_ends.begin(), density_ends.end(), comparison); |
---|
91 | |
---|
92 | for(int i = 0; i < size; i++) |
---|
93 | { |
---|
94 | int start, end, density; |
---|
95 | start = density_starts[i]; |
---|
96 | end = density_ends[i]; |
---|
97 | density = end - start; |
---|
98 | |
---|
99 | symbol_density.push_back(density); |
---|
100 | } |
---|
101 | return symbol_density; |
---|
102 | } |
---|
103 | |
---|
104 | int SymbolStatGatherer::min(vector<int> v) |
---|
105 | { |
---|
106 | int result = INT_MAX; |
---|
107 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
---|
108 | { |
---|
109 | int number = *it; |
---|
110 | |
---|
111 | if (number < result) |
---|
112 | { |
---|
113 | result = number; |
---|
114 | } |
---|
115 | } |
---|
116 | |
---|
117 | if (!v.size()) |
---|
118 | { |
---|
119 | return 0; |
---|
120 | } |
---|
121 | return result; |
---|
122 | } |
---|
123 | |
---|
124 | float SymbolStatGatherer::total(vector<int> v) |
---|
125 | { |
---|
126 | float result = 0; |
---|
127 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
---|
128 | { |
---|
129 | int number = *it; |
---|
130 | result += number; |
---|
131 | } |
---|
132 | return result; |
---|
133 | } |
---|
134 | |
---|
135 | int SymbolStatGatherer::max(vector<int> v) |
---|
136 | { |
---|
137 | int result = INT_MIN; |
---|
138 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
---|
139 | { |
---|
140 | int number = *it; |
---|
141 | |
---|
142 | if (number > result) |
---|
143 | { |
---|
144 | result = number; |
---|
145 | } |
---|
146 | } |
---|
147 | |
---|
148 | if (!v.size()) |
---|
149 | { |
---|
150 | return 0; |
---|
151 | } |
---|
152 | return result; |
---|
153 | } |
---|
154 | |
---|
155 | double SymbolStatGatherer::average(vector<int> v) |
---|
156 | { |
---|
157 | double result = 0.0; |
---|
158 | for (vector<int>::iterator it = v.begin(); it < v.end(); it++) |
---|
159 | { |
---|
160 | int number = *it; |
---|
161 | |
---|
162 | result += number; |
---|
163 | } |
---|
164 | return result/v.size(); |
---|
165 | } |
---|