Ignore:
Timestamp:
Sep 29, 2014, 2:11:34 PM (5 years ago)
Author:
nmedfort
Message:

More refactoring of the RE system; moved the original re/RE_Compiler to compiler.cpp and the PBIX_Compiler to the re/RE_Compiler.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r3850 r4197  
     1/*
     2 *  Copyright (c) 2014 International Characters.
     3 *  This software is licensed to the public under the Open Software License 3.0.
     4 *  icgrep is a trademark of International Characters.
     5 */
     6
    17/*
    28 *  Copyright (c) 2014 International Characters.
     
    612
    713#include "compiler.h"
     14#include "re/re_nullable.h"
     15#include "re/re_simplifier.h"
     16#include "re/re_reducer.h"
     17#include "re/parsefailure.h"
     18#include "re/re_parser.h"
     19#include "re/re_compiler.h"
     20#include "hrtime.h"
     21#include "utf8_encoder.h"
     22#include "cc_compiler.h"
    823
    9 Compiler::Compiler()
     24//FOR TESTING AND AND ANALYSIS
     25//#include "pbix_counter.h"
     26
     27// #define DEBUG_PRINT_RE_AST
     28// #define DEBUG_PRINT_PBIX_AST
     29
     30#ifdef DEBUG_PRINT_RE_AST
     31#include "printer_re.h"
     32#endif
     33#ifdef DEBUG_PRINT_PBIX_AST
     34#include "printer_pablos.h"
     35#endif
     36
     37using namespace re;
     38
     39namespace icgrep {
     40
     41LLVM_Gen_RetVal compile(bool show_compile_time, bool ascii_only, std::string basis_pattern, std::string gensym_pattern, UTF_Encoding encoding, std::string input_string)
    1042{
     43    CC_Compiler cc_compiler(encoding, basis_pattern, gensym_pattern);
     44
     45    RE * re_ast = nullptr;
     46    try
     47    {
     48        re_ast = RE_Parser::parse_re(input_string);
     49    }
     50    catch (ParseFailure failure)
     51    {
     52        std::cerr << "REGEX PARSING FAILURE: " << failure.what() << std::endl;
     53        exit(1);
     54    }
     55
     56    #ifdef DEBUG_PRINT_RE_AST
     57    //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
     58    std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     59    #endif
     60
     61    //Add the UTF encoding.
     62    if (!ascii_only)
     63    {
     64        if (encoding.getName().compare("UTF-8") == 0)
     65        {
     66            re_ast = UTF8_Encoder::toUTF8(re_ast);
     67        }
     68        else
     69        {
     70            std::cerr << "Invalid encoding!" << std::endl;
     71            exit(1);
     72        }
     73    }
     74
     75    #ifdef DEBUG_PRINT_RE_AST
     76    //Print to the terminal the AST that was generated by the utf8 encoder.
     77    std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     78    #endif
     79
     80    //Optimization passes to simplify the AST.
     81    re_ast = RE_Nullable::removeNullablePrefix(re_ast);
     82
     83    re_ast = RE_Nullable::removeNullableSuffix(re_ast);
     84
     85    re_ast = RE_Simplifier::simplify(re_ast);
     86
     87    #ifdef DEBUG_PRINT_RE_AST
     88    //Print to the terminal the AST that was generated by the simplifier.
     89    std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     90    #endif
     91
     92    //Map all of the unique character classes in order to reduce redundancy.
     93    std::map<std::string, RE*> re_map;
     94    re_ast = RE_Reducer::reduce(re_ast, re_map);
     95
     96    #ifdef DEBUG_PRINT_RE_AST
     97    //Print to the terminal the AST with the reduced REs.
     98    std::cerr << "Reducer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     99    #endif
     100
     101    //Build our list of predefined characters.
     102    std::string cc_name;
     103    std::map<std::string,std::string> name_map;
     104
     105    CC* cc_lf = makeCC(0x0A);
     106    cc_name = cc_lf->getName();
     107    re_map.insert(make_pair(cc_name, cc_lf));
     108    name_map.insert(make_pair("LineFeed", cc_name));
     109
     110    CC* cc_utf8_single_byte = makeCC(0x00, 0x7F);
     111    cc_name = cc_utf8_single_byte->getName();
     112    re_map.insert(make_pair(cc_name, cc_utf8_single_byte));
     113    name_map.insert(make_pair("UTF8-SingleByte", cc_name));
     114
     115    CC* cc_utf8_prefix2 = makeCC(0xC2, 0xDF);
     116    cc_name = cc_utf8_prefix2->getName();
     117    re_map.insert(make_pair(cc_name, cc_utf8_prefix2));
     118    name_map.insert(make_pair("UTF8-Prefix2", cc_name));
     119
     120    CC* cc_utf8_prefix3 = makeCC(0xE0, 0xEF);
     121    cc_name = cc_utf8_prefix3->getName();
     122    re_map.insert(make_pair(cc_name, cc_utf8_prefix3));
     123    name_map.insert(make_pair("UTF8-Prefix3", cc_name));
     124
     125    CC* cc_utf8_prefix4 = makeCC(0xF0, 0xF4);
     126    cc_name = cc_utf8_prefix4->getName();
     127    re_map.insert(make_pair(cc_name, cc_utf8_prefix4));
     128    name_map.insert(make_pair("UTF8-Prefix4", cc_name));
     129
     130    cc_compiler.compile_from_map(re_map);
     131    std::list<PabloS*> cc_stmtsl = cc_compiler.get_compiled();
     132    #ifdef DEBUG_PRINT_PBIX_AST
     133    //Print to the terminal the AST that was generated by the character class compiler.
     134    std::cerr << "Pablo CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
     135    #endif
     136
     137    RE_Compiler pbix_compiler(name_map);
     138    CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
     139    #ifdef DEBUG_PRINT_PBIX_AST
     140    //Print to the terminal the AST that was generated for the re subexpressions.
     141    std::cerr << "Subexpressions:" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << std::endl;
     142    #endif
     143
     144    CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
     145    #ifdef DEBUG_PRINT_PBIX_AST
     146    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     147    std::cerr << "Final Pablo AST:" << StatementPrinter::PrintStmts(re_cg_state) << ")" << std::endl;
     148    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
     149    // std::cerr << "Pablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) << std::endl;
     150    #endif
     151
     152    LLVM_Generator irgen(name_map, basis_pattern, encoding.getBits());
     153
     154    unsigned long long cycles = 0;
     155    double timer = 0;
     156    if (show_compile_time)
     157    {
     158        cycles = get_hrcycles();
     159        timer = getElapsedTime();
     160    }
     161
     162    LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(re_cg_state, re_subexpression_cg_state, cc_stmtsl);
     163    if (show_compile_time)
     164    {
     165        cycles = get_hrcycles() - cycles;
     166        timer = getElapsedTime() - timer;
     167        std::cout << "LLVM compile time -  cycles:       " << cycles  << std::endl;
     168        std::cout << "LLVM compile time -  milliseconds: " << timer << std::endl;
     169    }
     170
     171    return retVal;
    11172}
    12173
    13 
    14 /*
    15 
    16   TODO: This will become the main driver for the application.  The parser, the cc compiler, the pbix compiler
    17   and the ir generator will all be called from here.
    18 
    19 */
     174}
Note: See TracChangeset for help on using the changeset viewer.