source: proto/SymbolTable/wcd_common_functions.h @ 1766

Last change on this file since 1766 was 1722, checked in by vla24, 8 years ago

SymbolTable?: added a python script to turn the content of all input files (dictionary and text) into lowercase

File size: 4.6 KB
Line 
1#ifndef WCD_COMMON_FUNCTIONS_H
2#define WCD_COMMON_FUNCTIONS_H
3
4#include <limits.h>
5
6using namespace std;
7
8#ifdef USE_LS_SYMBOL_TABLE
9template<bool allow_performance_check, bool finalize_gids> extern void do_process(FILE *infile, FILE *outfile);
10#else
11template<bool allow_performance_check> extern void do_process(FILE *infile, FILE *outfile);
12#endif
13
14inline int maximumIntegerInVector(vector<int> arr);
15inline void wordCountInDictionary(const int greatestGIDInDictionary, vector<int> gids);
16inline int findTotalKnownWordsInDictionary(const int greatestGIDInDictionary, vector<int> gids);
17void printWordCountInDictionary(const int totalUnknownWordsInDictionary, const int totalKnownWordsInDictionary);
18inline void getFilenames(const int argc, char** argv,
19                         char *& dictionaryfilename, char *& infilename, char *& outfilename);
20inline void openInputOutputFiles(const char * dictionaryfilename, const char * infilename, const char * outfilename,
21                                 FILE *& dictionaryfile, FILE *& infile, FILE *& outfile);
22inline void populateDictionary(FILE * dictionaryfile, FILE * outfile, vector<int>& gids, int& greatest_GID_in_dictionary);
23
24inline int maximumIntegerInVector(vector<int> arr)
25{
26    int max = INT_MIN;
27    for (vector<int>::iterator it = arr.begin(); it < arr.end(); it++)
28    {
29        if (max < *it)
30        {
31            max = *it;
32        }
33    }
34    return max;
35}
36
37inline void wordCountInDictionary(const int greatestGIDInDictionary, vector<int> gids,
38                                  int& totalUnknownWordsInDictionary, int& totalKnownWordsInDictionary)
39{
40    int greatestGIDInText = maximumIntegerInVector(gids);
41    totalUnknownWordsInDictionary = greatestGIDInText - greatestGIDInDictionary;
42    totalKnownWordsInDictionary = findTotalKnownWordsInDictionary(greatestGIDInDictionary, gids);
43}
44
45void printWordCountInDictionary(const int totalUnknownWordsInDictionary, const int totalKnownWordsInDictionary)
46{
47#if PRINT_DICTIONARY_INFO
48    printf ("%i known words, %i unknown words\n",totalKnownWordsInDictionary, totalUnknownWordsInDictionary);
49#endif
50}
51
52inline int findTotalKnownWordsInDictionary(const int greatestGIDInDictionary, vector<int> gids)
53{
54    // Collect all gids less than the greatest GID of known words in the Dictionary
55    // These gids would represent the known words in Dictionary
56    bool* tempArray = (bool*) calloc(greatestGIDInDictionary+1, sizeof(bool));
57
58    for (vector<int>::iterator it = gids.begin(); it < gids.end(); it++)
59    {
60        int GID = *it;
61        if (GID <= greatestGIDInDictionary)
62        {
63            tempArray[GID] = true;
64        }
65    }
66
67    // count the number of known words now
68    int knownWords = 0;
69    for (unsigned int i = 0; i <= greatestGIDInDictionary; i++)
70    {
71        if (tempArray[i])
72        {
73            knownWords ++;
74        }
75    }
76
77    free(tempArray);
78    return knownWords;
79}
80
81inline void getFilenames(const int argc, char** argv,
82                         char *& dictionaryfilename, char *& infilename, char *& outfilename)
83{
84    if (argc < 3) {
85        printf("Usage: %s <dictionaryfile> <textfile> [<outputfile>]\n", argv[0]);
86        exit(-1);
87    }
88
89    dictionaryfilename = argv[1];
90    infilename = argv[2];
91    if (argc < 4) outfilename = NULL;
92    else {
93        outfilename = argv[3];
94    }
95}
96
97inline void openInputOutputFiles(const char * dictionaryfilename, const char * infilename, const char * outfilename,
98                                 FILE *& dictionaryfile, FILE *& infile, FILE *& outfile)
99{
100    struct stat fileinfo;
101
102    stat(dictionaryfilename, &fileinfo);
103    dictionaryfile = fopen(dictionaryfilename, "rb");
104    if (!dictionaryfile) {
105            fprintf(stderr, "Error: cannot open %s for input.\n", dictionaryfilename);
106            exit(-1);
107    }
108
109    stat(infilename, &fileinfo);
110    infile = fopen(infilename, "rb");
111    if (!infile) {
112            fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
113            exit(-1);
114    }
115
116    if (!outfilename) outfile = stdout;
117    else {
118            outfile = fopen(outfilename, "wb");
119            if (!outfile) {
120                    fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
121                    exit(-1);
122            }
123    }
124}
125
126inline void populateDictionary(FILE * dictionaryfile, FILE * outfile, vector<int>& gids, int& greatest_GID_in_dictionary)
127{
128    // populate dictionary
129#ifdef USE_LS_SYMBOL_TABLE
130    do_process<false, true>(dictionaryfile, outfile);
131#else
132    do_process<false>(dictionaryfile, outfile);
133#endif
134    greatest_GID_in_dictionary = maximumIntegerInVector(gids);
135    gids.clear();
136}
137
138#endif // WCD_COMMON_FUNCTIONS_H
Note: See TracBrowser for help on using the repository browser.