source: icGREP/icgrep-devel/icgrep/re_compiler.cpp @ 3956

Last change on this file since 3956 was 3956, checked in by daled, 5 years ago

Matchstar for utf-8 character classes works. Needs refactoring.

File size: 5.3 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "re_compiler.h"
8
9RE_Compiler::RE_Compiler(){}
10
11LLVM_Gen_RetVal RE_Compiler::compile(bool show_compile_time,
12                                     bool ascii_only,
13                                     std::string basis_pattern,
14                                     std::string gensym_pattern,
15                                     UTF_Encoding encoding,
16                                     std::string input_string)
17{
18
19    ParseResult* parse_result = RE_Parser::parse_re(input_string);
20
21    RE* re_ast = 0;
22    if (ParseSuccess* success = dynamic_cast<ParseSuccess*>(parse_result))
23    {
24        re_ast = success->getRE();
25    }
26    else if (ParseFailure* failure = dynamic_cast<ParseFailure*>(parse_result))
27    {
28        std::cout << failure->getErrorMsg() << std::endl;
29        exit(1);
30    }
31    else
32    {
33        std::cout << "An unexepected parser error has occured!" << std::endl;
34        exit(1);
35    }
36
37    //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
38    //std::cout << "\nParser:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
39
40    //Add the UTF encoding.
41    if (!ascii_only)
42    {
43        if (encoding.getName().compare("UTF-8") == 0)
44        {
45            re_ast = UTF8_Encoder::toUTF8(re_ast);
46        }
47        else
48        {
49            std::cout << "Invalid encoding!" << std::endl;
50            exit(1);
51        }
52    }
53
54    //Print to the terminal the AST that was generated by the utf8 encoder.
55    //std::cout << "\nUTF8-encoder:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
56
57    //Optimization passes to simplify the AST.
58    re_ast = RE_Simplifier::simplify(RE_Nullable::removeNullableSuffix(RE_Nullable::removeNullablePrefix(re_ast)));
59
60    //Print to the terminal the AST that was generated by the simplifier.
61    //std::cout << "\nSimplifier:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
62
63    //Map all of the unique character classes in order to reduce redundancy.
64    std::map<std::string, RE*> re_map;
65    re_ast = RE_Reducer::reduce(re_ast, re_map);
66
67    //Print to the terminal the AST with the reduced REs.
68    //std::cout << "\nReducer:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
69
70    //Build our list of predefined characters.
71    std::string cc_name;
72    std::map<std::string,std::string> name_map;
73    std::list<CC*> predefined_characters;
74
75    CC* cc_lf = new CC(0x0A);
76    cc_name = cc_lf->getName();
77    re_map.insert(make_pair(cc_name, cc_lf));
78    name_map.insert(make_pair("LineFeed", cc_name));
79
80    CC* cc_utf8_single_byte = new CC(0x80, 0xBF);
81    cc_name = cc_utf8_single_byte->getName();
82    re_map.insert(make_pair(cc_name, cc_utf8_single_byte));
83    name_map.insert(make_pair("UTF8-SingleByte", cc_name));
84
85    CC* cc_utf8_prefix2 = new CC(0xC2, 0xDF);
86    cc_name = cc_utf8_prefix2->getName();
87    re_map.insert(make_pair(cc_name, cc_utf8_prefix2));
88    name_map.insert(make_pair("UTF8-Prefix2", cc_name));
89
90    CC* cc_utf8_prefix3 = new CC(0xE0, 0xEF);
91    cc_name = cc_utf8_prefix3->getName();
92    re_map.insert(make_pair(cc_name, cc_utf8_prefix3));
93    name_map.insert(make_pair("UTF8-Prefix3", cc_name));
94
95    CC* cc_utf8_prefix4 = new CC(0xF0, 0xF4);
96    cc_name = cc_utf8_prefix4->getName();
97    re_map.insert(make_pair(cc_name, cc_utf8_prefix4));
98    name_map.insert(make_pair("UTF8-Prefix4", cc_name));
99
100    CC_Compiler cc_compiler(encoding);
101    std::list<PabloS*> cc_stmtsl = cc_compiler.compile(basis_pattern, gensym_pattern, re_map, predefined_characters);
102
103    //Print to the terminal the AST that was generated by the character class compiler.
104    //std::cout << "\n" << "(" << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << ")" << "\n" << std::endl;
105
106    Pbix_Compiler pbix_compiler(name_map);
107    CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
108    //Print to the terminal the AST that was generated for the re subexpressions.
109    //std::cout << "\n" << "Subexpressions: (" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << ")" << std::endl;
110
111    CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
112    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
113    //std::cout << "\n" << "(" << StatementPrinter::PrintStmts(re_cg_state) << ")" << "\n" << std::endl;
114
115    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
116    //std::cout << "\nPablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) <<  "\n" << std::endl;
117
118    LLVM_Generator irgen(name_map, basis_pattern, encoding.getBits());
119
120    unsigned long long cycles = 0;
121    double timer = 0;
122    if (show_compile_time)
123    {
124        cycles = get_hrcycles();
125        timer = getElapsedTime();
126    }
127
128    LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(re_cg_state, re_subexpression_cg_state, cc_stmtsl);
129    if (show_compile_time)
130    {
131        cycles = get_hrcycles() - cycles;
132        timer = getElapsedTime() - timer;
133        std::cout << "LLVM compile time -  cycles:       " << cycles  << std::endl;
134        std::cout << "LLVM compile time -  milliseconds: " << timer << std::endl;
135    }
136
137    return  retVal;  //irgen.Generate_LLVMIR(re_cg_state, cc_stmtsl);
138}
139
Note: See TracBrowser for help on using the repository browser.