source: proto/SymbolTable/wcd_common_functions.h @ 4204

Last change on this file since 4204 was 1793, checked in by vla24, 8 years ago

Added some text files for wdc. updated performance test.

File size: 4.6 KB
Line 
1#ifndef WCD_COMMON_FUNCTIONS_H
2#define WCD_COMMON_FUNCTIONS_H
3
4#include <limits.h>
5
6using namespace std;
7
8#ifdef USE_LS_SYMBOL_TABLE
9template<bool allow_performance_check, bool finalize_gids> extern void do_process(FILE *infile, FILE *outfile);
10#else
11template<bool allow_performance_check> extern void do_process(FILE *infile, FILE *outfile);
12#endif
13
14inline int maximumIntegerInVector(vector<int> arr);
15inline void wordCountInDictionary(const int greatestGIDInDictionary, vector<int> gids);
16inline int findTotalKnownWordsInDictionary(const int greatestGIDInDictionary, vector<int> gids);
17void printWordCountInDictionary(const int totalUnknownWordsInDictionary, const int totalKnownWordsInDictionary);
18inline void getFilenames(const int argc, char** argv,
19                         char *& dictionaryfilename, char *& infilename, char *& outfilename);
20inline void openInputOutputFiles(const char * dictionaryfilename, const char * infilename, const char * outfilename,
21                                 FILE *& dictionaryfile, FILE *& infile, FILE *& outfile);
22inline void populateDictionary(FILE * dictionaryfile, FILE * outfile, vector<int>& gids, int& greatest_GID_in_dictionary);
23
24inline int maximumIntegerInVector(vector<int> arr)
25{
26    int max = INT_MIN;
27    for (vector<int>::iterator it = arr.begin(); it < arr.end(); it++)
28    {
29        if (max < *it)
30        {
31            max = *it;
32        }
33    }
34
35    if (!arr.size())
36    {
37        return 0;
38    }
39    return max;
40}
41
42inline void wordCountInDictionary(const int greatestGIDInDictionary, vector<int> gids,
43                                  int& totalUnknownWordsInDictionary, int& totalKnownWordsInDictionary)
44{
45    int greatestGIDInText = maximumIntegerInVector(gids);
46    totalUnknownWordsInDictionary = greatestGIDInText - greatestGIDInDictionary;
47    totalKnownWordsInDictionary = findTotalKnownWordsInDictionary(greatestGIDInDictionary, gids);
48}
49
50void printWordCountInDictionary(const int totalUnknownWordsInDictionary, const int totalKnownWordsInDictionary)
51{
52#if PRINT_DICTIONARY_INFO
53    printf ("%i known words, %i unknown words\n",totalKnownWordsInDictionary, totalUnknownWordsInDictionary);
54#endif
55}
56
57inline int findTotalKnownWordsInDictionary(const int greatestGIDInDictionary, vector<int> gids)
58{
59    // Collect all gids less than the greatest GID of known words in the Dictionary
60    // These gids would represent the known words in Dictionary
61    bool* tempArray = (bool*) calloc(greatestGIDInDictionary+1, sizeof(bool));
62
63    for (vector<int>::iterator it = gids.begin(); it < gids.end(); it++)
64    {
65        int GID = *it;
66        if (GID <= greatestGIDInDictionary)
67        {
68            tempArray[GID] = true;
69        }
70    }
71
72    // count the number of known words now
73    int knownWords = 0;
74    for (unsigned int i = 0; i <= greatestGIDInDictionary; i++)
75    {
76        if (tempArray[i])
77        {
78            knownWords ++;
79        }
80    }
81
82    free(tempArray);
83    return knownWords;
84}
85
86inline void getFilenames(const int argc, char** argv,
87                         char *& dictionaryfilename, char *& infilename, char *& outfilename)
88{
89    if (argc < 3) {
90        printf("Usage: %s <dictionaryfile> <textfile> [<outputfile>]\n", argv[0]);
91        exit(-1);
92    }
93
94    dictionaryfilename = argv[1];
95    infilename = argv[2];
96    if (argc < 4) outfilename = NULL;
97    else {
98        outfilename = argv[3];
99    }
100}
101
102inline void openInputOutputFiles(const char * dictionaryfilename, const char * infilename, const char * outfilename,
103                                 FILE *& dictionaryfile, FILE *& infile, FILE *& outfile)
104{
105    struct stat fileinfo;
106
107    stat(dictionaryfilename, &fileinfo);
108    dictionaryfile = fopen(dictionaryfilename, "rb");
109    if (!dictionaryfile) {
110            fprintf(stderr, "Error: cannot open %s for input.\n", dictionaryfilename);
111            exit(-1);
112    }
113
114    stat(infilename, &fileinfo);
115    infile = fopen(infilename, "rb");
116    if (!infile) {
117            fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
118            exit(-1);
119    }
120
121    if (!outfilename) outfile = stdout;
122    else {
123            outfile = fopen(outfilename, "wb");
124            if (!outfile) {
125                    fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
126                    exit(-1);
127            }
128    }
129}
130
131inline void populateDictionary(FILE * dictionaryfile, FILE * outfile, vector<int>& gids, int& greatest_GID_in_dictionary)
132{
133    // populate dictionary
134#ifdef USE_LS_SYMBOL_TABLE
135    do_process<false, true>(dictionaryfile, outfile);
136#else
137    do_process<false>(dictionaryfile, outfile);
138#endif
139    greatest_GID_in_dictionary = maximumIntegerInVector(gids);
140    gids.clear();
141}
142
143#endif // WCD_COMMON_FUNCTIONS_H
Note: See TracBrowser for help on using the repository browser.