Changeset 4197


Ignore:
Timestamp:
Sep 29, 2014, 2:11:34 PM (5 years ago)
Author:
nmedfort
Message:

More refactoring of the RE system; moved the original re/RE_Compiler to compiler.cpp and the PBIX_Compiler to the re/RE_Compiler.

Location:
icGREP/icgrep-devel/icgrep
Files:
2 deleted
20 edited
2 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4196 r4197  
    5555add_library(PabloADT pe_advance.cpp  pe_all.cpp  pe_and.cpp pe_call.cpp pe_charclass.cpp  pe_matchstar.cpp pe_scanthru.cpp pe_not.cpp  pe_or.cpp  pe_pabloe.cpp  pe_sel.cpp  pe_var.cpp  pe_xor.cpp ps_assign.cpp  ps_if.cpp  ps_pablos.cpp  ps_while.cpp printer_pablos.cpp)
    5656
    57 add_library(RegExpADT re/re_alt.cpp  re/re_cc.cpp  re/re_end.cpp  re/re_name.cpp re/re_parser.cpp  re/re_re.cpp  re/re_rep.cpp  re/re_seq.cpp re/re_start.cpp re/parsefailure.cpp printer_re.cpp)
     57add_library(RegExpADT re/re_alt.cpp re/re_cc.cpp re/re_end.cpp re/re_name.cpp re/re_parser.cpp re/re_re.cpp re/re_rep.cpp re/re_seq.cpp re/re_start.cpp re/parsefailure.cpp re/re_reducer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/symbol_generator.cpp printer_re.cpp)
    5858
    5959include_directories("${PROJECT_SOURCE_DIR}")
     60include_directories("${PROJECT_SOURCE_DIR}/re")
    6061include_directories("${PROJECT_SOURCE_DIR}/include")
    6162include_directories("${PROJECT_SOURCE_DIR}/include/simd-lib")
     
    6768
    6869# add the executable
    69 add_executable(icgrep icgrep.cpp llvm_gen.cpp llvm_gen_helper.cpp utf_encoding.cpp cc_compiler.cpp  cc_compiler_helper.cpp re/re_simplifier.cpp re/re_reducer.cpp re/re_nullable.cpp re/re_compiler.cpp pbix_compiler.cpp  symbol_generator.cpp utf8_encoder.cpp unicode_categories.h unicode_categories-flat.h unicode_categories-simple.h)
     70add_executable(icgrep icgrep.cpp llvm_gen.cpp llvm_gen_helper.cpp utf_encoding.cpp cc_compiler.cpp  cc_compiler_helper.cpp compiler.cpp utf8_encoder.cpp unicode_categories.h unicode_categories-flat.h unicode_categories-simple.h)
    7071
    7172target_link_libraries (icgrep PabloADT RegExpADT ${REQ_LLVM_LIBRARIES})
  • icGREP/icgrep-devel/icgrep/cc_compiler.h

    r4194 r4197  
    4848    UTF_Encoding mEncoding;
    4949
    50 
    5150    void add_predefined(std::string key_value, Expression *mapped_value);
    5251    Expression* add_assignment(std::string value, Expression* expr);
  • icGREP/icgrep-devel/icgrep/cc_compiler_helper.cpp

    r4196 r4197  
    2020#include "pe_var.h"
    2121#include "pe_xor.h"
     22// #include "pe_pabloe.h"
     23// #include "cc_compiler.h"
    2224
    2325CC_Compiler_Helper::CC_Compiler_Helper(){}
     
    2729
    2830     -Maintaining Assembler Instruction Form:
    29        -All boolean algebraic rules involving true/flase applied.
     31       -All boolean algebraic rules involving true/false applied.
    3032
    3133       -Negations restricted:
     
    3436*/
    3537
    36 PabloE* CC_Compiler_Helper::make_not(PabloE* expr)
     38PabloE * CC_Compiler_Helper::make_not(PabloE* expr)
    3739{
    3840    if (All* all = dynamic_cast<All*>(expr)) {
     
    5759}
    5860
    59 PabloE* CC_Compiler_Helper::make_and(PabloE * expr1, PabloE *expr2)
     61PabloE * CC_Compiler_Helper::make_and(PabloE * expr1, PabloE *expr2)
    6062{
    6163    if (All* all = dynamic_cast<All*>(expr1)) {
     
    8183        }
    8284    }
    83     else if (equal_exprs(expr1, expr2)) {
     85    else if (equals(expr1, expr2)) {
    8486        delete expr2;
    8587        return expr1;
     
    98100            return make_not(make_or(e1, e2));
    99101        }
    100         else if (equal_exprs(pe_not_e1->getExpr(), expr2)) {
     102        else if (equals(pe_not_e1->getExpr(), expr2)) {
    101103            delete expr1;
    102104            delete expr2;
     
    105107    }
    106108    else if (Not * pe_not_e2 = dynamic_cast<Not*>(expr2)) {
    107         if (equal_exprs(expr1, pe_not_e2->getExpr())) {
     109        if (equals(expr1, pe_not_e2->getExpr())) {
    108110            delete expr1;
    109111            delete expr2;
     
    114116}
    115117
    116 PabloE* CC_Compiler_Helper::make_or(PabloE * expr1, PabloE * expr2)
     118PabloE * CC_Compiler_Helper::make_or(PabloE * expr1, PabloE * expr2)
    117119{
    118120    if (All * all = dynamic_cast<All*>(expr1)) {
     
    149151        return make_not(make_and(expr2, make_not(expr1)));
    150152    }
    151     else if (equal_exprs(expr1, expr2)) {
     153    else if (equals(expr1, expr2)) {
    152154        delete expr2;
    153155        return expr1;
     
    165167            //These optimizations factor out common components that can occur when sets are formed by union
    166168            //(e.g., union of [a-z] and [A-Z].
    167             if (equal_exprs(expr1a, expr2a))
     169            if (equals(expr1a, expr2a))
    168170            {
    169171                return make_and(expr1a, make_or(expr1b, expr2b));
    170172            }
    171             else if (equal_exprs(expr1b, expr2b))
     173            else if (equals(expr1b, expr2b))
    172174            {
    173175                return make_and(expr1b, make_or(expr1a, expr2a));
    174176            }
    175             else if (equal_exprs(expr1a, expr2b))
     177            else if (equals(expr1a, expr2b))
    176178            {
    177179                return make_and(expr1a, make_or(expr1b, expr2a));
    178180            }
    179             else if (equal_exprs(expr1b, expr2a))
     181            else if (equals(expr1b, expr2a))
    180182            {
    181183                return make_and(expr1b, make_or(expr1a, expr2b));
     
    222224        }
    223225    }
    224     else if (equal_exprs(t_expr, f_expr))
     226    else if (equals(t_expr, f_expr))
    225227    {
    226228        return t_expr;
     
    276278*/
    277279
    278 bool CC_Compiler_Helper::equal_exprs(const PabloE * expr1, const PabloE * expr2)
    279 {
    280     if (const All * all_expr1 = dynamic_cast<const All*>(expr1))
    281     {
    282         if (const All * all_expr2 = dynamic_cast<const All*>(expr2))
    283         {
    284             return all_expr1->getNum() == all_expr2->getNum();
    285         }
    286     }
    287     else if (const Var * var_expr1 = dynamic_cast<const Var*>(expr1))
    288     {
    289         if (const Var * var_expr2 = dynamic_cast<const Var*>(expr2))
    290         {
    291             return (var_expr1->getVar() == var_expr2->getVar());
    292         }
    293     }
    294     else if (const Not* not_expr1 = dynamic_cast<const Not*>(expr1))
    295     {
    296         if (const Not* not_expr2 = dynamic_cast<const Not*>(expr2))
    297         {
    298             return equal_exprs(not_expr1->getExpr(), not_expr2->getExpr());
    299         }
    300     }
    301     else if (const And* and_expr1 = dynamic_cast<const And*>(expr1))
    302     {
    303         if (const And* and_expr2 = dynamic_cast<const And*>(expr2))
    304         {
    305             if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr1()))
    306             {
    307                 return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr2());
    308             }
    309             else if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr2()))
    310             {
    311                 return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr1());
    312             }
    313         }
    314     }
    315     else if (const Or * or_expr1 = dynamic_cast<const Or*>(expr1))
    316     {
    317         if (const Or* or_expr2 = dynamic_cast<const Or*>(expr2))
    318         {
    319             if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr1()))
    320             {
    321                 return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr2());
    322             }
    323             else if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr2()))
    324             {
    325                 return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr1());
    326             }
    327         }
    328     }
    329     else if (const Xor * xor_expr1 = dynamic_cast<const Xor *>(expr1))
    330     {
    331         if (const Xor * xor_expr2 = dynamic_cast<const Xor *>(expr2))
    332         {
    333             if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr1()))
    334             {
    335                 return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr2());
    336             }
    337             else if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr2()))
    338             {
    339                 return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr1());
    340             }
    341         }
    342     }
    343     else if (const Sel* sel_expr1 = dynamic_cast<const Sel*>(expr1))
    344     {
    345         if (const Sel* sel_expr2 = dynamic_cast<const Sel*>(expr2))
    346         {
    347             if (equal_exprs(sel_expr1->getIf_expr(), sel_expr2->getIf_expr()))
    348             {
    349                 if (equal_exprs(sel_expr1->getT_expr(), sel_expr2->getT_expr()))
    350                 {
    351                     return equal_exprs(sel_expr1->getF_expr(), sel_expr2->getF_expr());
     280bool CC_Compiler_Helper::equals(const PabloE * expr1, const PabloE * expr2)
     281{
     282    if (const All * all1 = dynamic_cast<const All*>(expr1)) {
     283        if (const All * all2 = dynamic_cast<const All*>(expr2)) {
     284            return all1->getNum() == all2->getNum();
     285        }
     286    }
     287    else if (const Var * var1 = dynamic_cast<const Var*>(expr1)) {
     288        if (const Var * var2 = dynamic_cast<const Var*>(expr2)) {
     289            return (var1->getVar() == var2->getVar());
     290        }
     291    }
     292    else if (const Not* not1 = dynamic_cast<const Not*>(expr1)) {
     293        if (const Not* not2 = dynamic_cast<const Not*>(expr2)) {
     294            return equals(not1->getExpr(), not2->getExpr());
     295        }
     296    }
     297    else if (const And* and1 = dynamic_cast<const And*>(expr1)) {
     298        if (const And* and2 = dynamic_cast<const And*>(expr2)) {
     299            if (equals(and1->getExpr1(), and2->getExpr1())) {
     300                return equals(and1->getExpr2(), and2->getExpr2());
     301            }
     302            else if (equals(and1->getExpr1(), and2->getExpr2())) {
     303                return equals(and1->getExpr2(), and2->getExpr1());
     304            }
     305        }
     306    }
     307    else if (const Or * or1 = dynamic_cast<const Or*>(expr1)) {
     308        if (const Or* or2 = dynamic_cast<const Or*>(expr2)) {
     309            if (equals(or1->getExpr1(), or2->getExpr1())) {
     310                return equals(or1->getExpr2(), or2->getExpr2());
     311            }
     312            else if (equals(or1->getExpr1(), or2->getExpr2())) {
     313                return equals(or1->getExpr2(), or2->getExpr1());
     314            }
     315        }
     316    }
     317    else if (const Xor * xor1 = dynamic_cast<const Xor *>(expr1)) {
     318        if (const Xor * xor2 = dynamic_cast<const Xor *>(expr2)) {
     319            if (equals(xor1->getExpr1(), xor2->getExpr1())) {
     320                return equals(xor1->getExpr2(), xor2->getExpr2());
     321            }
     322            else if (equals(xor1->getExpr1(), xor2->getExpr2())) {
     323                return equals(xor1->getExpr2(), xor2->getExpr1());
     324            }
     325        }
     326    }
     327    else if (const Sel* sel1 = dynamic_cast<const Sel*>(expr1)) {
     328        if (const Sel* sel2 = dynamic_cast<const Sel*>(expr2)) {
     329            if (equals(sel1->getIf_expr(), sel2->getIf_expr())) {
     330                if (equals(sel1->getT_expr(), sel2->getT_expr())) {
     331                    return equals(sel1->getF_expr(), sel2->getF_expr());
    352332                }
    353333            }
  • icGREP/icgrep-devel/icgrep/cc_compiler_helper.h

    r4187 r4197  
    88#define COMPILER_HELPER_H
    99
    10 #include "pbix_compiler.h"
    11 #include "cc_compiler.h"
     10class PabloE;
    1211
    1312class CC_Compiler_Helper
     
    1918    static PabloE* make_sel(PabloE* if_expr, PabloE* t_expr, PabloE* f_expr);
    2019    static PabloE* make_xor(PabloE* expr1, PabloE* expr2);
    21     static bool equal_exprs(const PabloE *expr1, const PabloE *expr2);
     20    static bool equals(const PabloE *expr1, const PabloE *expr2);
    2221private:
    2322    CC_Compiler_Helper();
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r3850 r4197  
     1/*
     2 *  Copyright (c) 2014 International Characters.
     3 *  This software is licensed to the public under the Open Software License 3.0.
     4 *  icgrep is a trademark of International Characters.
     5 */
     6
    17/*
    28 *  Copyright (c) 2014 International Characters.
     
    612
    713#include "compiler.h"
     14#include "re/re_nullable.h"
     15#include "re/re_simplifier.h"
     16#include "re/re_reducer.h"
     17#include "re/parsefailure.h"
     18#include "re/re_parser.h"
     19#include "re/re_compiler.h"
     20#include "hrtime.h"
     21#include "utf8_encoder.h"
     22#include "cc_compiler.h"
    823
    9 Compiler::Compiler()
     24//FOR TESTING AND AND ANALYSIS
     25//#include "pbix_counter.h"
     26
     27// #define DEBUG_PRINT_RE_AST
     28// #define DEBUG_PRINT_PBIX_AST
     29
     30#ifdef DEBUG_PRINT_RE_AST
     31#include "printer_re.h"
     32#endif
     33#ifdef DEBUG_PRINT_PBIX_AST
     34#include "printer_pablos.h"
     35#endif
     36
     37using namespace re;
     38
     39namespace icgrep {
     40
     41LLVM_Gen_RetVal compile(bool show_compile_time, bool ascii_only, std::string basis_pattern, std::string gensym_pattern, UTF_Encoding encoding, std::string input_string)
    1042{
     43    CC_Compiler cc_compiler(encoding, basis_pattern, gensym_pattern);
     44
     45    RE * re_ast = nullptr;
     46    try
     47    {
     48        re_ast = RE_Parser::parse_re(input_string);
     49    }
     50    catch (ParseFailure failure)
     51    {
     52        std::cerr << "REGEX PARSING FAILURE: " << failure.what() << std::endl;
     53        exit(1);
     54    }
     55
     56    #ifdef DEBUG_PRINT_RE_AST
     57    //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
     58    std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     59    #endif
     60
     61    //Add the UTF encoding.
     62    if (!ascii_only)
     63    {
     64        if (encoding.getName().compare("UTF-8") == 0)
     65        {
     66            re_ast = UTF8_Encoder::toUTF8(re_ast);
     67        }
     68        else
     69        {
     70            std::cerr << "Invalid encoding!" << std::endl;
     71            exit(1);
     72        }
     73    }
     74
     75    #ifdef DEBUG_PRINT_RE_AST
     76    //Print to the terminal the AST that was generated by the utf8 encoder.
     77    std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     78    #endif
     79
     80    //Optimization passes to simplify the AST.
     81    re_ast = RE_Nullable::removeNullablePrefix(re_ast);
     82
     83    re_ast = RE_Nullable::removeNullableSuffix(re_ast);
     84
     85    re_ast = RE_Simplifier::simplify(re_ast);
     86
     87    #ifdef DEBUG_PRINT_RE_AST
     88    //Print to the terminal the AST that was generated by the simplifier.
     89    std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     90    #endif
     91
     92    //Map all of the unique character classes in order to reduce redundancy.
     93    std::map<std::string, RE*> re_map;
     94    re_ast = RE_Reducer::reduce(re_ast, re_map);
     95
     96    #ifdef DEBUG_PRINT_RE_AST
     97    //Print to the terminal the AST with the reduced REs.
     98    std::cerr << "Reducer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     99    #endif
     100
     101    //Build our list of predefined characters.
     102    std::string cc_name;
     103    std::map<std::string,std::string> name_map;
     104
     105    CC* cc_lf = makeCC(0x0A);
     106    cc_name = cc_lf->getName();
     107    re_map.insert(make_pair(cc_name, cc_lf));
     108    name_map.insert(make_pair("LineFeed", cc_name));
     109
     110    CC* cc_utf8_single_byte = makeCC(0x00, 0x7F);
     111    cc_name = cc_utf8_single_byte->getName();
     112    re_map.insert(make_pair(cc_name, cc_utf8_single_byte));
     113    name_map.insert(make_pair("UTF8-SingleByte", cc_name));
     114
     115    CC* cc_utf8_prefix2 = makeCC(0xC2, 0xDF);
     116    cc_name = cc_utf8_prefix2->getName();
     117    re_map.insert(make_pair(cc_name, cc_utf8_prefix2));
     118    name_map.insert(make_pair("UTF8-Prefix2", cc_name));
     119
     120    CC* cc_utf8_prefix3 = makeCC(0xE0, 0xEF);
     121    cc_name = cc_utf8_prefix3->getName();
     122    re_map.insert(make_pair(cc_name, cc_utf8_prefix3));
     123    name_map.insert(make_pair("UTF8-Prefix3", cc_name));
     124
     125    CC* cc_utf8_prefix4 = makeCC(0xF0, 0xF4);
     126    cc_name = cc_utf8_prefix4->getName();
     127    re_map.insert(make_pair(cc_name, cc_utf8_prefix4));
     128    name_map.insert(make_pair("UTF8-Prefix4", cc_name));
     129
     130    cc_compiler.compile_from_map(re_map);
     131    std::list<PabloS*> cc_stmtsl = cc_compiler.get_compiled();
     132    #ifdef DEBUG_PRINT_PBIX_AST
     133    //Print to the terminal the AST that was generated by the character class compiler.
     134    std::cerr << "Pablo CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
     135    #endif
     136
     137    RE_Compiler pbix_compiler(name_map);
     138    CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
     139    #ifdef DEBUG_PRINT_PBIX_AST
     140    //Print to the terminal the AST that was generated for the re subexpressions.
     141    std::cerr << "Subexpressions:" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << std::endl;
     142    #endif
     143
     144    CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
     145    #ifdef DEBUG_PRINT_PBIX_AST
     146    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     147    std::cerr << "Final Pablo AST:" << StatementPrinter::PrintStmts(re_cg_state) << ")" << std::endl;
     148    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
     149    // std::cerr << "Pablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) << std::endl;
     150    #endif
     151
     152    LLVM_Generator irgen(name_map, basis_pattern, encoding.getBits());
     153
     154    unsigned long long cycles = 0;
     155    double timer = 0;
     156    if (show_compile_time)
     157    {
     158        cycles = get_hrcycles();
     159        timer = getElapsedTime();
     160    }
     161
     162    LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(re_cg_state, re_subexpression_cg_state, cc_stmtsl);
     163    if (show_compile_time)
     164    {
     165        cycles = get_hrcycles() - cycles;
     166        timer = getElapsedTime() - timer;
     167        std::cout << "LLVM compile time -  cycles:       " << cycles  << std::endl;
     168        std::cout << "LLVM compile time -  milliseconds: " << timer << std::endl;
     169    }
     170
     171    return retVal;
    11172}
    12173
    13 
    14 /*
    15 
    16   TODO: This will become the main driver for the application.  The parser, the cc compiler, the pbix compiler
    17   and the ir generator will all be called from here.
    18 
    19 */
     174}
  • icGREP/icgrep-devel/icgrep/compiler.h

    r3850 r4197  
    88#define COMPILER_H
    99
    10 #include "compiler.h"
    11 #include "re_parser.h"
    12 #include "pbix_compiler.h"
     10#include "utf_encoding.h"
    1311#include "llvm_gen.h"
     12#include <string>
    1413
    15 class Compiler
    16 {
    17 public:
    18     Compiler();
    19 };
     14namespace icgrep {
    2015
    21 #endif // COMPILER_H
     16LLVM_Gen_RetVal compile(bool show_compile_time, bool ascii_only, std::string basis_pattern, std::string gensym_pattern, UTF_Encoding encoding, std::string input_string);
    2217
    23 /*
     18}
    2419
    25   TODO: This will become the main driver for the application.  The parser, the cc compiler, the pbix compiler
    26   and the ir generator will all be called from here.
    27 
    28 */
     20#endif
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4194 r4197  
    1818compiler.cpp
    1919compiler.h
    20 compiler_helper.cpp
    21 compiler_helper.h
    2220hrtime.h
    2321icgrep.cpp
     
    136134re/re_start.cpp
    137135re/re_start.h
     136re/symbol_generator.cpp
     137re/symbol_generator.h
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4194 r4197  
    66
    77#include "icgrep.h"
    8 
    98#include "utf_encoding.h"
    10 #include "re/re_compiler.h"
     9#include "compiler.h"
    1110
    1211#include <fstream>
     
    2322#include <sys/stat.h>
    2423
    25 #include <simd-lib/bitblock.hpp>
    26 #include <simd-lib/carryQ.hpp>
    27 #include <simd-lib/pabloSupport.hpp>
    28 #include <simd-lib/s2p.hpp>
    29 #include <simd-lib/buffer.hpp>
    30 #include <simd-lib/bitblock_iterator.hpp>
     24#include "include/simd-lib/bitblock.hpp"
     25#include "include/simd-lib/carryQ.hpp"
     26#include "include/simd-lib/pabloSupport.hpp"
     27#include "include/simd-lib/s2p.hpp"
     28#include "include/simd-lib/buffer.hpp"
     29#include "include/simd-lib/bitblock_iterator.hpp"
     30#include "include/simd-lib/transpose.hpp"
    3131
    3232#include "hrtime.h"
     
    6464    BitBlock LF;
    6565};
    66 
    67 #include "include/simd-lib/transpose.hpp"
    6866
    6967using namespace std;
     
    232230    encoding.setMask(0xFF);
    233231
    234     auto * re_compiler = new re::RE_Compiler();
    235232    if (compile_time_option)
    236233    {
     
    238235        timer = getElapsedTime();
    239236    }
    240     LLVM_Gen_RetVal llvm_codegen = re_compiler->compile(compile_time_option,
    241                                                         ascii_only_option,
    242                                                         "basis_bits.bit_",
    243                                                         "temp",
    244                                                         encoding ,
    245                                                         (regex_from_file_option ? fileregex : inregex));
     237    LLVM_Gen_RetVal llvm_codegen = icgrep::compile(compile_time_option,
     238                                                   ascii_only_option,
     239                                                   "basis_bits.bit_",
     240                                                   "temp",
     241                                                   encoding ,
     242                                                   (regex_from_file_option ? fileregex : inregex));
    246243
    247244    if (compile_time_option)
     
    264261    }
    265262
    266     delete re_compiler;
    267263#ifndef USE_MMAP
    268264    fclose(infile);
  • icGREP/icgrep-devel/icgrep/icgrep.h

    r3991 r4197  
    88 */
    99
    10 #include <simd-lib/bitblock.hpp>
    11 #include <simd-lib/carryQ.hpp>
    12 #include <simd-lib/pabloSupport.hpp>
     10#include "include/simd-lib/bitblock.hpp"
     11#include "include/simd-lib/carryQ.hpp"
     12#include "include/simd-lib/pabloSupport.hpp"
    1313
    1414struct Basis_bits {
  • icGREP/icgrep-devel/icgrep/llvm_gen.h

    r4194 r4197  
    3535#include "cc_compiler.h"
    3636
    37 #include "pbix_compiler.h"
     37// #include "pbix_compiler.h"
    3838
    3939#include "llvm_gen_helper.h"
     
    5050#include <algorithm>
    5151
    52 #include "llvm/Support/raw_ostream.h"
     52#include <llvm/Support/raw_ostream.h>
    5353
    5454#ifdef USE_LLVM_3_4
    55 #include "llvm/Analysis/Verifier.h"
    56 #include "llvm/Assembly/PrintModulePass.h"
    57 #include "llvm/Linker.h"
     55#include <llvm/Analysis/Verifier.h>
     56#include <llvm/Assembly/PrintModulePass.h>
     57#include <llvm/Linker.h>
    5858#endif
    5959
    6060#ifdef USE_LLVM_3_5
    61 #include "llvm/IR/Verifier.h"
     61#include <llvm/IR/Verifier.h>
    6262#endif
    6363
    64 #include "llvm/Pass.h"
    65 #include "llvm/PassManager.h"
    66 #include "llvm/ADT/SmallVector.h"
    67 #include "llvm/Analysis/Passes.h"
    68 #include "llvm/IR/BasicBlock.h"
    69 #include "llvm/IR/CallingConv.h"
    70 #include "llvm/IR/Constants.h"
    71 #include "llvm/IR/DataLayout.h"
    72 #include "llvm/IR/DerivedTypes.h"
    73 #include "llvm/IR/Function.h"
    74 #include "llvm/IR/GlobalVariable.h"
    75 #include "llvm/IR/InlineAsm.h"
    76 #include "llvm/IR/Instructions.h"
    77 #include "llvm/IR/LLVMContext.h"
    78 #include "llvm/IR/Module.h"
    79 #include "llvm/Support/FormattedStream.h"
    80 #include "llvm/Support/MathExtras.h"
    81 #include "llvm/Support/Casting.h"
    82 #include "llvm/Support/Debug.h"
     64#include <llvm/Pass.h>
     65#include <llvm/PassManager.h>
     66#include <llvm/ADT/SmallVector.h>
     67#include <llvm/Analysis/Passes.h>
     68#include <llvm/IR/BasicBlock.h>
     69#include <llvm/IR/CallingConv.h>
     70#include <llvm/IR/Constants.h>
     71#include <llvm/IR/DataLayout.h>
     72#include <llvm/IR/DerivedTypes.h>
     73#include <llvm/IR/Function.h>
     74#include <llvm/IR/GlobalVariable.h>
     75#include <llvm/IR/InlineAsm.h>
     76#include <llvm/IR/Instructions.h>
     77#include <llvm/IR/LLVMContext.h>
     78#include <llvm/IR/Module.h>
     79#include <llvm/Support/FormattedStream.h>
     80#include <llvm/Support/MathExtras.h>
     81#include <llvm/Support/Casting.h>
     82#include <llvm/Support/Debug.h>
    8383
    84 #include "llvm/Support/TargetSelect.h"
    85 #include "llvm/Transforms/Scalar.h"
     84#include <llvm/Support/TargetSelect.h>
     85#include <llvm/Transforms/Scalar.h>
    8686
    87 #include "llvm/ExecutionEngine/ExecutionEngine.h"
    88 #include "llvm/ExecutionEngine/MCJIT.h"
     87#include <llvm/ExecutionEngine/ExecutionEngine.h>
     88#include <llvm/ExecutionEngine/MCJIT.h>
    8989
    90 #include "llvm/IRReader/IRReader.h"
    91 #include "llvm/Bitcode/ReaderWriter.h"
    92 #include "llvm/Support/MemoryBuffer.h"
     90#include <llvm/IRReader/IRReader.h>
     91#include <llvm/Bitcode/ReaderWriter.h>
     92#include <llvm/Support/MemoryBuffer.h>
    9393
    94 #include "llvm/IR/IRBuilder.h"
     94#include <llvm/IR/IRBuilder.h>
    9595
    9696#include "include/simd-lib/bitblock.hpp"
  • icGREP/icgrep-devel/icgrep/pbix_compiler.cpp

    r4195 r4197  
    66
    77#include "pbix_compiler.h"
    8 //Regular Expressions
    9 #include "re/re_name.h"
    10 #include "re/re_start.h"
    11 #include "re/re_end.h"
    12 #include "re/re_seq.h"
    13 #include "re/re_alt.h"
    14 #include "re/re_rep.h"
    15 
    16 //Pablo Expressions
    17 #include "pe_pabloe.h"
    18 #include "pe_sel.h"
    19 #include "pe_advance.h"
    20 #include "pe_all.h"
    21 #include "pe_and.h"
    22 #include "pe_charclass.h"
    23 #include "pe_call.h"
    24 #include "pe_matchstar.h"
    25 #include "pe_scanthru.h"
    26 #include "pe_not.h"
    27 #include "pe_or.h"
    28 #include "pe_var.h"
    29 #include "pe_xor.h"
    30 
    31 //Pablo Statements
    32 #include "ps_pablos.h"
    33 #include "ps_assign.h"
    34 #include "ps_if.h"
    35 #include "ps_while.h"
    36 
    37 #include <assert.h>
    38 #include <stdexcept>
    39 
    40 using namespace re;
    41 
    42 Pbix_Compiler::Pbix_Compiler(std::map<std::string, std::string> name_map)
    43 {
    44     m_name_map = name_map;
    45     symgen = SymbolGenerator();
    46 }
    47 
    48 CodeGenState Pbix_Compiler::compile_subexpressions(const std::map<std::string, RE*>& re_map)
    49 {
    50     CodeGenState cg_state;
    51     for (auto i =  re_map.rbegin(); i != re_map.rend(); ++i) {
    52         //This is specifically for the utf8 multibyte character classes.
    53         if (Seq * seq = dyn_cast<Seq>(i->second)) {
    54             if (seq->getType() == Seq::Type::Byte) {
    55                 std::string gs_retVal = symgen.gensym("start_marker");
    56                 cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));               
    57                 for (auto j = seq->begin();; ) {
    58                     Name * name = dyn_cast<Name>(*j);
    59                     assert (name);
    60                     And * cc_mask = new And(new Var(gs_retVal), new CharClass(name->getName()));
    61                     if (++j != seq->end()) {
    62                         gs_retVal = symgen.gensym("marker");
    63                         cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(cc_mask)));
    64                     }
    65                     else {
    66                         cg_state.stmtsl.push_back(new Assign(seq->getName(), cc_mask));
    67                         break;
    68                     }
    69                 }
    70                 cg_state.newsym = gs_retVal;
    71             }
    72         }
    73     }
    74     return cg_state;
    75 }
    76 
    77 CodeGenState Pbix_Compiler::compile(RE *re)
    78 {   
    79     CodeGenState cg_state;
    80 
    81     std::string gs_m0 = symgen.gensym("start_marker");
    82     cg_state.stmtsl.push_back(new Assign(gs_m0, new All(1)));
    83 
    84     if (hasUnicode(re))
    85     {
    86         cg_state.newsym = gs_m0;
    87         //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.
    88         std::string gs_initial = symgen.gensym("internal.initial");
    89         m_name_map.insert(make_pair("internal.initial", gs_initial));
    90         PabloE * u8single = new Var(m_name_map.find("UTF8-SingleByte")->second);
    91         PabloE * u8pfx2 = new Var(m_name_map.find("UTF8-Prefix2")->second);
    92         PabloE * u8pfx3 = new Var(m_name_map.find("UTF8-Prefix3")->second);
    93         PabloE * u8pfx4 = new Var(m_name_map.find("UTF8-Prefix4")->second);
    94         PabloE * u8pfx = new Or(new Or(u8pfx2, u8pfx3), u8pfx4);
    95         cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(u8pfx, u8single)));
    96         cg_state.newsym = gs_initial;
    97 
    98         //Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
    99         cg_state.newsym = gs_m0;
    100         std::string gs_nonfinal = symgen.gensym("internal.nonfinal");
    101         m_name_map.insert(make_pair("internal.nonfinal", gs_nonfinal));
    102         //#define USE_IF_FOR_NONFINAL
    103         #ifdef USE_IF_FOR_NONFINAL
    104         cg_state.stmtsl.push_back(new Assign(gs_nonfinal, new All(0)));
    105         #endif
    106         PabloE * u8scope32 = new Advance(u8pfx3);
    107         PabloE * u8scope42 = new Advance(u8pfx4);
    108         PabloE * u8scope43 = new Advance(u8scope42);
    109         PabloS * assign_non_final = new Assign(gs_nonfinal, new Or(new Or(u8pfx, u8scope32), new Or(u8scope42, u8scope43)));
    110         #ifdef USE_IF_FOR_NONFINAL
    111         std::list<PabloS *> * if_body = new std::list<PabloS *> ();
    112         if_body->push_back(assign_non_final);
    113         cg_state.stmtsl.push_back(new If(u8pfx, *if_body));
    114         #else
    115         cg_state.stmtsl.push_back(assign_non_final);
    116         #endif
    117         cg_state.newsym = gs_nonfinal;
    118     }
    119 
    120     cg_state.newsym = gs_m0;
    121     cg_state = re2pablo_helper(re, cg_state);
    122 
    123     //These three lines are specifically for grep.
    124     std::string gs_retVal = symgen.gensym("marker");
    125     cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new MatchStar(new Var(cg_state.newsym),
    126         new Not(new Var(m_name_map.find("LineFeed")->second))), new Var(m_name_map.find("LineFeed")->second))));
    127     cg_state.newsym = gs_retVal;
    128 
    129     return cg_state;
    130 }
    131 
    132 CodeGenState Pbix_Compiler::re2pablo_helper(RE *re, CodeGenState cg_state)
    133 {
    134     if (Name* name = dyn_cast<Name>(re))
    135     {
    136         std::string gs_retVal = symgen.gensym("marker");
    137         PabloE* markerExpr = new Var(cg_state.newsym);
    138         if (name->getType() != Name::Type::FixedLength) {
    139             // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    140             markerExpr = new And(markerExpr, new CharClass(m_name_map.find("internal.initial")->second));
    141             markerExpr = new ScanThru(markerExpr, new CharClass(m_name_map.find("internal.nonfinal")->second));
    142         }       
    143         PabloE* ccExpr;
    144         if (name->getType() == Name::Type::UnicodeCategory)
    145         {
    146             ccExpr = new Call(name->getName());
    147         }
    148         else
    149         {
    150             ccExpr = new CharClass(name->getName());
    151         }
    152         if (name->isNegated()) {
    153             ccExpr = new Not(new Or(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second)),
    154                                     new CharClass(m_name_map.find("internal.nonfinal")->second)));
    155         }
    156         cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(ccExpr, markerExpr))));
    157         cg_state.newsym = gs_retVal;
    158     }
    159     else if (isa<Start>(re))
    160     {
    161         std::string gs_retVal = symgen.gensym("start_of_line_marker");
    162         cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new Not(new Advance(new Not(new CharClass(m_name_map.find("LineFeed")->second)))))));
    163         cg_state.newsym = gs_retVal;
    164     }
    165     else if (isa<End>(re))
    166     {
    167         std::string gs_retVal = symgen.gensym("end_of_line_marker");
    168         cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new CharClass(m_name_map.find("LineFeed")->second))));
    169         cg_state.newsym = gs_retVal;
    170     }
    171     else if (Seq* seq = dyn_cast<Seq>(re))
    172     {
    173         if (!seq->empty())
    174         {
    175             cg_state = Seq_helper(seq, seq->begin(), cg_state);
    176         }
    177     }
    178     else if (Alt* alt = dyn_cast<Alt>(re))
    179     {
    180         if (alt->empty())
    181         {
    182             std::string gs_retVal = symgen.gensym("always_fail_marker");
    183             cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(0)));
    184             cg_state.newsym = gs_retVal;
    185         }
    186         else
    187         {
    188             if (alt->size() == 1)
    189             {
    190                 cg_state = re2pablo_helper(alt->back(), cg_state);
    191             }
    192             else
    193             {
    194                 cg_state = Alt_helper(alt, alt->begin(), cg_state);
    195             }
    196         }
    197 
    198     }
    199     else if (Rep* rep = dyn_cast<Rep>(re))
    200     {
    201         if (isa<Name>(rep->getRE()) && (rep->getLB() == 0) && (rep->getUB()== Rep::UNBOUNDED_REP))
    202         {
    203             Name* rep_name = dyn_cast<Name>(rep->getRE());
    204             std::string gs_retVal = symgen.gensym("marker");
    205 
    206             PabloE* ccExpr;
    207             if (rep_name->getType() == Name::Type::UnicodeCategory)
    208             {
    209                 ccExpr = new Call(rep_name->getName());
    210             }
    211             else
    212             {
    213                 ccExpr = new CharClass(rep_name->getName());
    214             }
    215 
    216             if (rep_name->isNegated()) {
    217                 ccExpr = new Not(new Or(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second)),
    218                                         new CharClass(m_name_map.find("internal.nonfinal")->second)));
    219             }
    220             if (rep_name->getType() == Name::Type::FixedLength)
    221             {
    222                 cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), ccExpr)));
    223             }
    224             else //Name::Unicode and Name::UnicodeCategory
    225             {
    226                 cg_state.stmtsl.push_back(new Assign(gs_retVal,
    227                     new And(new MatchStar(new Var(cg_state.newsym), new Or(new CharClass(m_name_map.find("internal.nonfinal")->second),
    228                     ccExpr)), new CharClass(m_name_map.find("internal.initial")->second))));
    229             }
    230 
    231             cg_state.newsym = gs_retVal;
    232         }
    233         else if (rep->getUB() == Rep::UNBOUNDED_REP)
    234         {
    235             cg_state = UnboundedRep_helper(rep->getRE(), rep->getLB(), cg_state);
    236         }
    237         else if (rep->getUB() != Rep::UNBOUNDED_REP)
    238         {
    239             cg_state = BoundedRep_helper(rep->getRE(), rep->getLB(), rep->getUB(), cg_state);
    240         }
    241     }
    242 
    243     return cg_state;
    244 }
    245 
    246 
    247 CodeGenState Pbix_Compiler::Seq_helper(Vector *lst, const_iterator it, CodeGenState cg_state)
    248 {
    249     if (it != lst->end())
    250     {
    251         cg_state = re2pablo_helper(*it, cg_state);
    252         cg_state = Seq_helper(lst, ++it, cg_state);
    253     }
    254 
    255     return cg_state;
    256 }
    257 
    258 CodeGenState Pbix_Compiler::Alt_helper(Vector* lst, const_iterator it, CodeGenState cg_state)
    259 {
    260     CodeGenState t1_cg_state = re2pablo_helper(*it, cg_state);
    261     cg_state.stmtsl = t1_cg_state.stmtsl;
    262     ++it;
    263     if (it != lst->end())
    264     {
    265         CodeGenState t2_cg_state = Alt_helper(lst, it, cg_state);
    266         cg_state.stmtsl = t2_cg_state.stmtsl;
    267         std::string gs_retVal = symgen.gensym("alt_marker");
    268         cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(t1_cg_state.newsym), new Var(t2_cg_state.newsym))));
    269         cg_state.newsym = gs_retVal;
    270     }
    271     else
    272     {
    273         cg_state.newsym = t1_cg_state.newsym;
    274     }
    275 
    276     return cg_state;
    277 }
    278 
    279 CodeGenState Pbix_Compiler::UnboundedRep_helper(RE* repeated, int lb, CodeGenState cg_state) {
    280     if (lb == 0)
    281     {
    282          std::string while_test_gs_retVal = symgen.gensym("while_test");
    283          std::string while_accum_gs_retVal = symgen.gensym("while_accum");
    284          CodeGenState while_test_state;
    285          while_test_state.newsym = while_test_gs_retVal;
    286          CodeGenState t1_cg_state = re2pablo_helper(repeated, while_test_state);
    287          cg_state.stmtsl.push_back(new Assign(while_test_gs_retVal, new Var(cg_state.newsym)));
    288          cg_state.stmtsl.push_back(new Assign(while_accum_gs_retVal, new Var(cg_state.newsym)));
    289          std::list<PabloS*> stmtList;
    290          stmtList = t1_cg_state.stmtsl;
    291          stmtList.push_back(new Assign(while_test_gs_retVal, new And(new Var(t1_cg_state.newsym), new Not(new Var(while_accum_gs_retVal)))));
    292          stmtList.push_back(new Assign(while_accum_gs_retVal, new Or(new Var(while_accum_gs_retVal), new Var(t1_cg_state.newsym))));
    293          cg_state.stmtsl.push_back( new While(new Var(while_test_gs_retVal), stmtList));
    294          cg_state.newsym = while_accum_gs_retVal;
    295     }
    296     else //if (lb > 0)
    297     {
    298          CodeGenState t1_cg_state = re2pablo_helper(repeated, cg_state);
    299          cg_state = UnboundedRep_helper(repeated, lb -1, t1_cg_state);
    300     }
    301     return cg_state;
    302 }
    303 
    304 
    305 CodeGenState Pbix_Compiler::BoundedRep_helper(RE* repeated, int lb, int ub, CodeGenState cg_state) {
    306     if ((lb == 0) && (ub == 0))
    307     {
    308     //Just fall through...do nothing.
    309     }
    310     else if ((lb == 0) && (ub > 0))
    311     {
    312          CodeGenState t1_cg_state = re2pablo_helper(repeated, cg_state);
    313          CodeGenState t2_cg_state = BoundedRep_helper(repeated, 0, ub-1, t1_cg_state);
    314          std::string gs_retVal = symgen.gensym("alt_marker");
    315          cg_state.stmtsl = t2_cg_state.stmtsl;
    316          cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(cg_state.newsym), new Var(t2_cg_state.newsym))));
    317          cg_state.newsym = gs_retVal;
    318     }
    319     else //if ((lb > 0) && (ub > 0))
    320     {
    321          CodeGenState t1_cg_state = re2pablo_helper(repeated, cg_state);
    322          cg_state = BoundedRep_helper(repeated, lb-1, ub-1, t1_cg_state);
    323     }
    324     return cg_state;
    325 }
    326 
    327 
    328 bool Pbix_Compiler::hasUnicode(const RE * re) {
    329     bool found = false;
    330     if (re == nullptr) {
    331         throw std::runtime_error("Unexpected Null Value passed to RE Compiler!");
    332     }
    333     else if (const Name * name = dyn_cast<const Name>(re)) {
    334         if ((name->getType() == Name::Type::UnicodeCategory) || (name->getType() == Name::Type::Unicode)) {
    335             found = true;
    336         }
    337     }
    338     else if (const Seq * re_seq = dyn_cast<const Seq>(re)) {
    339         for (auto i = re_seq->cbegin(); i != re_seq->cend(); ++i) {
    340             if (hasUnicode(*i)) {
    341                 found = true;
    342                 break;
    343             }
    344         }
    345     }
    346     else if (const Alt * re_alt = dyn_cast<const Alt>(re)) {
    347         for (auto i = re_alt->cbegin(); i != re_alt->cend(); ++i) {
    348             if (hasUnicode(*i)) {
    349                 found = true;
    350                 break;
    351             }
    352         }
    353     }
    354     else if (const Rep * rep = dyn_cast<const Rep>(re)) {
    355         found = hasUnicode(rep->getRE());
    356     }
    357     return found;
    358 }
  • icGREP/icgrep-devel/icgrep/pbix_compiler.h

    r4194 r4197  
    55 */
    66
    7 #ifndef COMPILER_H
    8 #define COMPILER_H
    9 
    10 //Regular Expressions
    11 #include "re/re_re.h"
    12 //Pablo Statements
    13 #include "ps_pablos.h"
    14 //Code Generation
    15 #include "symbol_generator.h"
    16 
    17 #include <string>
    18 #include <list>
    19 #include <map>
    20 
    21 
    22 struct CodeGenState{
    23     std::list<PabloS*> stmtsl;
    24     std::string newsym;
    25 };
    26 
    27 class Pbix_Compiler
    28 {
    29     typedef re::RE                  RE;
    30     typedef re::Vector              Vector;
    31     typedef Vector::const_iterator  const_iterator;
    32 
    33 public:
    34     Pbix_Compiler(std::map<std::string, std::string> name_map);
    35     CodeGenState compile(RE *re);
    36     CodeGenState compile_subexpressions(const std::map<std::string, RE*>& re_map);
    37 private:
    38     CodeGenState re2pablo_helper(RE *re, CodeGenState cg_state);
    39     CodeGenState Seq_helper(Vector * lst, const_iterator it, CodeGenState cg_state);
    40     CodeGenState Alt_helper(Vector * lst, const_iterator it, CodeGenState cg_state);
    41     CodeGenState UnboundedRep_helper(RE* repeated, int lb, CodeGenState cg_state);
    42     CodeGenState BoundedRep_helper(RE* repeated, int lb, int ub, CodeGenState cg_state);
    43 
    44     static bool hasUnicode(const RE *re);
    45 
    46     SymbolGenerator symgen;
    47     std::map<std::string, std::string> m_name_map;
    48 };
    49 
    50 #endif // COMPILER_H
  • icGREP/icgrep-devel/icgrep/printer_pablos.cpp

    r4194 r4197  
    6969std::string StatementPrinter::Print_CC_PabloStmts(std::list<PabloS*> stmts)
    7070{
    71     std::string strOut;
    72 
    73     std::cout << "Total Statements: " + std::to_string(stmts.size()) << std::endl;
    74     std::list<PabloS*>::iterator it;
    75     for (it = stmts.begin(); it != stmts.end(); ++it)
    76     {
    77         strOut += ShowPabloS(*it) + "\n";
     71    std::string strOut = "Total Statements: " + std::to_string(stmts.size()) + "\n";
     72    for (const auto stmt : stmts) {
     73        strOut += ShowPabloS(stmt) + "\n";
    7874    }
    79 
    8075    return strOut;
    8176}
  • icGREP/icgrep-devel/icgrep/printer_pablos.h

    r4194 r4197  
    88#define SHOW_H
    99
    10 #include "pbix_compiler.h"
    11 
     10#include "ps_pablos.h"
    1211#include <string>
    1312#include <list>
    14 
    1513
    1614class StatementPrinter
  • icGREP/icgrep-devel/icgrep/ps_pablos.cpp

    r3850 r4197  
    77#include "ps_pablos.h"
    88
    9 PabloS::PabloS(){};
    10 PabloS::~PabloS(){};
     9PabloS::PabloS(){}
     10PabloS::~PabloS(){}
    1111
  • icGREP/icgrep-devel/icgrep/ps_pablos.h

    r3850 r4197  
    88#define PS_PABLOS_H
    99
     10#include <list>
     11#include <string>
    1012#include "pe_pabloe.h"
    1113
    12 class PabloS
    13 {
     14class PabloS {
    1415public:
    1516    virtual ~PabloS();
     
    1819};
    1920
     21struct CodeGenState{
     22    std::list<PabloS*> stmtsl;
     23    std::string newsym;
     24};
     25
    2026#endif // PS_PABLOS_H
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4194 r4197  
    66
    77#include "re_compiler.h"
     8//Regular Expressions
     9#include "re_name.h"
     10#include "re_start.h"
     11#include "re_end.h"
     12#include "re_seq.h"
    813#include "re_alt.h"
    9 #include "re_cc.h"
    10 #include "re_name.h"
    11 #include "re_end.h"
    1214#include "re_rep.h"
    13 #include "re_seq.h"
    14 #include "re_start.h"
    15 #include "re_nullable.h"
    16 #include "re_simplifier.h"
    17 #include "re_reducer.h"
    18 #include "parsefailure.h"
    19 #include "re_parser.h"
    20 
    21 #include "../hrtime.h"
    22 
    23 #include "../printer_pablos.h"
    24 #include "../printer_re.h"
    25 
    26 #include "../utf8_encoder.h"
    27 #include "../cc_compiler.h"
    28 #include "../pbix_compiler.h"
    29 #include "../symbol_generator.h"
    30 
    31 //FOR TESTING AND AND ANALYSIS
    32 //#include "pbix_counter.h"
    33 
    34 // #define DEBUG_PRINT_RE_AST
    35 // #define DEBUG_PRINT_PBIX_AST
     15
     16//Pablo Expressions
     17#include "../pe_pabloe.h"
     18#include "../pe_sel.h"
     19#include "../pe_advance.h"
     20#include "../pe_all.h"
     21#include "../pe_and.h"
     22#include "../pe_charclass.h"
     23#include "../pe_call.h"
     24#include "../pe_matchstar.h"
     25#include "../pe_scanthru.h"
     26#include "../pe_not.h"
     27#include "../pe_or.h"
     28#include "../pe_var.h"
     29#include "../pe_xor.h"
     30
     31//Pablo Statements
     32#include "../ps_pablos.h"
     33#include "../ps_assign.h"
     34#include "../ps_if.h"
     35#include "../ps_while.h"
     36
     37#include <assert.h>
     38#include <stdexcept>
    3639
    3740namespace re {
    3841
    39 RE_Compiler::RE_Compiler(){}
    40 
    41 LLVM_Gen_RetVal RE_Compiler::compile(bool show_compile_time,
    42                                      bool ascii_only,
    43                                      std::string basis_pattern,
    44                                      std::string gensym_pattern,
    45                                      UTF_Encoding encoding,
    46                                      std::string input_string)
     42RE_Compiler::RE_Compiler(std::map<std::string, std::string> name_map)
     43: m_name_map(name_map)
     44, symgen()
    4745{
    48     CC_Compiler cc_compiler(encoding, basis_pattern, gensym_pattern);
    49 
    50 //    std::cerr << "============================================================================" << std::endl;
    51 //    std::cerr << input_string << std::endl;
    52 //    std::cerr << "============================================================================" << std::endl;
    53 
    54     RE* re_ast = nullptr;
    55     try
    56     {
    57         re_ast = RE_Parser::parse_re(input_string);
    58     }
    59     catch (ParseFailure failure)
    60     {
    61         std::cerr << "REGEX PARSING FAILURE: " << failure.what() << std::endl;
    62         exit(1);
    63     }
    64 
    65     #ifdef DEBUG_PRINT_RE_AST
    66     //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
    67     std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    68     #endif
    69 
    70     //Add the UTF encoding.
    71     if (!ascii_only)
    72     {
    73         if (encoding.getName().compare("UTF-8") == 0)
    74         {
    75             re_ast = UTF8_Encoder::toUTF8(re_ast);
    76         }
    77         else
    78         {
    79             std::cerr << "Invalid encoding!" << std::endl;
    80             exit(1);
    81         }
    82     }
    83 
    84     #ifdef DEBUG_PRINT_RE_AST
    85     //Print to the terminal the AST that was generated by the utf8 encoder.
    86     std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    87     #endif
    88 
    89     //Optimization passes to simplify the AST.
    90     re_ast = RE_Nullable::removeNullablePrefix(re_ast);
    91 
    92     re_ast = RE_Nullable::removeNullableSuffix(re_ast);
    93 
    94     re_ast = RE_Simplifier::simplify(re_ast);
    95 
    96     #ifdef DEBUG_PRINT_RE_AST
    97     //Print to the terminal the AST that was generated by the simplifier.
    98     std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    99     #endif
    100 
    101     //Map all of the unique character classes in order to reduce redundancy.
    102     std::map<std::string, RE*> re_map;
    103     re_ast = RE_Reducer::reduce(re_ast, re_map);
    104 
    105     #ifdef DEBUG_PRINT_RE_AST
    106     //Print to the terminal the AST with the reduced REs.
    107     std::cerr << "Reducer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    108     #endif
    109 
    110     //Build our list of predefined characters.
    111     std::string cc_name;
    112     std::map<std::string,std::string> name_map;
    113 
    114     CC* cc_lf = makeCC(0x0A);
    115     cc_name = cc_lf->getName();
    116     re_map.insert(make_pair(cc_name, cc_lf));
    117     name_map.insert(make_pair("LineFeed", cc_name));
    118 
    119     CC* cc_utf8_single_byte = makeCC(0x00, 0x7F);
    120     cc_name = cc_utf8_single_byte->getName();
    121     re_map.insert(make_pair(cc_name, cc_utf8_single_byte));
    122     name_map.insert(make_pair("UTF8-SingleByte", cc_name));
    123 
    124     CC* cc_utf8_prefix2 = makeCC(0xC2, 0xDF);
    125     cc_name = cc_utf8_prefix2->getName();
    126     re_map.insert(make_pair(cc_name, cc_utf8_prefix2));
    127     name_map.insert(make_pair("UTF8-Prefix2", cc_name));
    128 
    129     CC* cc_utf8_prefix3 = makeCC(0xE0, 0xEF);
    130     cc_name = cc_utf8_prefix3->getName();
    131     re_map.insert(make_pair(cc_name, cc_utf8_prefix3));
    132     name_map.insert(make_pair("UTF8-Prefix3", cc_name));
    133 
    134     CC* cc_utf8_prefix4 = makeCC(0xF0, 0xF4);
    135     cc_name = cc_utf8_prefix4->getName();
    136     re_map.insert(make_pair(cc_name, cc_utf8_prefix4));
    137     name_map.insert(make_pair("UTF8-Prefix4", cc_name));
    138 
    139     cc_compiler.compile_from_map(re_map);
    140     std::list<PabloS*> cc_stmtsl = cc_compiler.get_compiled();
    141     #ifdef DEBUG_PRINT_PBIX_AST
    142     //Print to the terminal the AST that was generated by the character class compiler.
    143     std::cerr << "Pablo CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
    144     #endif
    145 
    146     Pbix_Compiler pbix_compiler(name_map);
    147     CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
    148     #ifdef DEBUG_PRINT_PBIX_AST
    149     //Print to the terminal the AST that was generated for the re subexpressions.
    150     std::cerr << "Subexpressions:" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << std::endl;
    151     #endif
    152 
    153     CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
    154     #ifdef DEBUG_PRINT_PBIX_AST
    155     //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    156     std::cerr << "Final Pablo AST:" << StatementPrinter::PrintStmts(re_cg_state) << ")" << std::endl;
    157     //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
    158     // std::cerr << "Pablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) << std::endl;
    159     #endif
    160 
    161     LLVM_Generator irgen(name_map, basis_pattern, encoding.getBits());
    162 
    163     unsigned long long cycles = 0;
    164     double timer = 0;
    165     if (show_compile_time)
    166     {
    167         cycles = get_hrcycles();
    168         timer = getElapsedTime();
    169     }
    170 
    171     LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(re_cg_state, re_subexpression_cg_state, cc_stmtsl);
    172     if (show_compile_time)
    173     {
    174         cycles = get_hrcycles() - cycles;
    175         timer = getElapsedTime() - timer;
    176         std::cout << "LLVM compile time -  cycles:       " << cycles  << std::endl;
    177         std::cout << "LLVM compile time -  milliseconds: " << timer << std::endl;
    178     }
    179 
    180     return  retVal;  //irgen.Generate_LLVMIR(re_cg_state, cc_stmtsl);
    181 }
    182 
    183 }
     46
     47}
     48
     49CodeGenState RE_Compiler::compile_subexpressions(const std::map<std::string, RE*>& re_map)
     50{
     51    CodeGenState cg_state;
     52    for (auto i =  re_map.rbegin(); i != re_map.rend(); ++i) {
     53        //This is specifically for the utf8 multibyte character classes.
     54        if (Seq * seq = dyn_cast<Seq>(i->second)) {
     55            if (seq->getType() == Seq::Type::Byte) {
     56                std::string gs_retVal = symgen.get("start_marker");
     57                cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
     58                for (auto j = seq->begin();; ) {
     59                    Name * name = dyn_cast<Name>(*j);
     60                    assert (name);
     61                    And * cc_mask = new And(new Var(gs_retVal), new CharClass(name->getName()));
     62                    if (++j != seq->end()) {
     63                        gs_retVal = symgen.get("marker");
     64                        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(cc_mask)));
     65                    }
     66                    else {
     67                        cg_state.stmtsl.push_back(new Assign(seq->getName(), cc_mask));
     68                        break;
     69                    }
     70                }
     71                cg_state.newsym = gs_retVal;
     72            }
     73        }
     74    }
     75    return cg_state;
     76}
     77
     78CodeGenState RE_Compiler::compile(RE * re)
     79{
     80    CodeGenState cg_state;
     81
     82    std::string gs_m0 = symgen.get("start_marker");
     83    cg_state.stmtsl.push_back(new Assign(gs_m0, new All(1)));
     84
     85    if (hasUnicode(re)) {
     86        cg_state.newsym = gs_m0;
     87        //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.
     88        std::string gs_initial = symgen.get("internal.initial");
     89        m_name_map.insert(make_pair("internal.initial", gs_initial));
     90        PabloE * u8single = new Var(m_name_map.find("UTF8-SingleByte")->second);
     91        PabloE * u8pfx2 = new Var(m_name_map.find("UTF8-Prefix2")->second);
     92        PabloE * u8pfx3 = new Var(m_name_map.find("UTF8-Prefix3")->second);
     93        PabloE * u8pfx4 = new Var(m_name_map.find("UTF8-Prefix4")->second);
     94        PabloE * u8pfx = new Or(new Or(u8pfx2, u8pfx3), u8pfx4);
     95        cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(u8pfx, u8single)));
     96        cg_state.newsym = gs_initial;
     97
     98        //Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
     99        cg_state.newsym = gs_m0;
     100        std::string gs_nonfinal = symgen.get("internal.nonfinal");
     101        m_name_map.insert(make_pair("internal.nonfinal", gs_nonfinal));
     102        //#define USE_IF_FOR_NONFINAL
     103        #ifdef USE_IF_FOR_NONFINAL
     104        cg_state.stmtsl.push_back(new Assign(gs_nonfinal, new All(0)));
     105        #endif
     106        PabloE * u8scope32 = new Advance(u8pfx3);
     107        PabloE * u8scope42 = new Advance(u8pfx4);
     108        PabloE * u8scope43 = new Advance(u8scope42);
     109        PabloS * assign_non_final = new Assign(gs_nonfinal, new Or(new Or(u8pfx, u8scope32), new Or(u8scope42, u8scope43)));
     110        #ifdef USE_IF_FOR_NONFINAL
     111        std::list<PabloS *> * if_body = new std::list<PabloS *> ();
     112        if_body->push_back(assign_non_final);
     113        cg_state.stmtsl.push_back(new If(u8pfx, *if_body));
     114        #else
     115        cg_state.stmtsl.push_back(assign_non_final);
     116        #endif
     117        cg_state.newsym = gs_nonfinal;
     118    }
     119
     120    cg_state.newsym = gs_m0;
     121    compile(re, cg_state);
     122
     123    //These three lines are specifically for grep.
     124    std::string gs_retVal = symgen.get("marker");
     125    cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new MatchStar(new Var(cg_state.newsym),
     126        new Not(new Var(m_name_map.find("LineFeed")->second))), new Var(m_name_map.find("LineFeed")->second))));
     127    cg_state.newsym = gs_retVal;
     128
     129    return cg_state;
     130}
     131
     132void RE_Compiler::compile(RE * re, CodeGenState & cg_state) {
     133    if (Name * name = dyn_cast<Name>(re)) {
     134        compile(name, cg_state);
     135    }
     136    else if (Seq* seq = dyn_cast<Seq>(re)) {
     137        compile(seq, cg_state);
     138    }
     139    else if (Alt * alt = dyn_cast<Alt>(re)) {
     140        compile(alt, cg_state);
     141    }
     142    else if (Rep * rep = dyn_cast<Rep>(re)) {
     143        compile(rep, cg_state);
     144    }
     145    else if (isa<Start>(re)) {
     146        std::string gs_retVal = symgen.get("sol");
     147        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new Not(new Advance(new Not(new CharClass(m_name_map.find("LineFeed")->second)))))));
     148        cg_state.newsym = gs_retVal;
     149    }
     150    else if (isa<End>(re)) {
     151        std::string gs_retVal = symgen.get("eol");
     152        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new CharClass(m_name_map.find("LineFeed")->second))));
     153        cg_state.newsym = gs_retVal;
     154    }
     155}
     156
     157inline void RE_Compiler::compile(Name * name, CodeGenState & cg_state) {
     158    std::string gs_retVal = symgen.get("marker");
     159    PabloE * markerExpr = new Var(cg_state.newsym);
     160    if (name->getType() != Name::Type::FixedLength) {
     161        // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
     162        markerExpr = new And(markerExpr, new CharClass(m_name_map.find("internal.initial")->second));
     163        markerExpr = new ScanThru(markerExpr, new CharClass(m_name_map.find("internal.nonfinal")->second));
     164    }
     165    PabloE * ccExpr;
     166    if (name->getType() == Name::Type::UnicodeCategory) {
     167        ccExpr = new Call(name->getName());
     168    }
     169    else {
     170        ccExpr = new CharClass(name->getName());
     171    }
     172    if (name->isNegated()) {
     173        ccExpr = new Not(new Or(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second)),
     174                                new CharClass(m_name_map.find("internal.nonfinal")->second)));
     175    }
     176    cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(ccExpr, markerExpr))));
     177    cg_state.newsym = gs_retVal;
     178}
     179
     180inline void RE_Compiler::compile(Seq * seq, CodeGenState & cg_state) {
     181    for (RE * re : *seq) {
     182        compile(re, cg_state);
     183    }
     184}
     185
     186inline void RE_Compiler::compile(Alt * alt, CodeGenState & cg_state) {
     187    if (alt->empty()) {
     188        std::string gs_retVal = symgen.get("always_fail_marker");
     189        cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(0)));
     190        cg_state.newsym = gs_retVal;
     191    }
     192    else {
     193        auto i = alt->begin();
     194        const std::string startsym = cg_state.newsym;
     195        compile(*i, cg_state);
     196        while (++i != alt->end()) {
     197            std::string oldsym = cg_state.newsym;
     198            cg_state.newsym = startsym;
     199            compile(*i, cg_state);
     200            std::string altsym = symgen.get("alt");
     201            cg_state.stmtsl.push_back(new Assign(altsym, new Or(new Var(oldsym), new Var(cg_state.newsym))));
     202            cg_state.newsym = altsym;
     203        }
     204    }
     205}
     206
     207inline void RE_Compiler::compile(Rep * rep, CodeGenState & cg_state) {
     208    if (isa<Name>(rep->getRE()) && (rep->getLB() == 0) && (rep->getUB()== Rep::UNBOUNDED_REP)) {
     209        Name * rep_name = dyn_cast<Name>(rep->getRE());
     210        std::string gs_retVal = symgen.get("marker");
     211
     212        PabloE* ccExpr;
     213        if (rep_name->getType() == Name::Type::UnicodeCategory) {
     214            ccExpr = new Call(rep_name->getName());
     215        }
     216        else {
     217            ccExpr = new CharClass(rep_name->getName());
     218        }
     219
     220        if (rep_name->isNegated()) {
     221            ccExpr = new Not(new Or(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second)), new CharClass(m_name_map.find("internal.nonfinal")->second)));
     222        }
     223        if (rep_name->getType() == Name::Type::FixedLength) {
     224            cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), ccExpr)));
     225        }
     226        else { // Name::Unicode and Name::UnicodeCategory
     227            cg_state.stmtsl.push_back(new Assign(gs_retVal,
     228                new And(new MatchStar(new Var(cg_state.newsym),
     229                        new Or(new CharClass(m_name_map.find("internal.nonfinal")->second), ccExpr)),
     230                               new CharClass(m_name_map.find("internal.initial")->second))));
     231        }
     232        cg_state.newsym = gs_retVal;
     233    }
     234    else if (rep->getUB() == Rep::UNBOUNDED_REP) {
     235        compileUnboundedRep(rep->getRE(), rep->getLB(), cg_state);
     236    }
     237    else { // if (rep->getUB() != Rep::UNBOUNDED_REP)
     238        compileBoundedRep(rep->getRE(), rep->getLB(), rep->getUB(), cg_state);
     239    }
     240}
     241
     242inline void RE_Compiler::compileUnboundedRep(RE * repeated, int lb, CodeGenState & cg_state) {
     243    for (; lb; --lb) {
     244        compile(repeated, cg_state);
     245    }
     246    std::string while_test = symgen.get("while_test");
     247    std::string while_accum = symgen.get("while_accum");
     248    CodeGenState while_test_state;
     249    while_test_state.newsym = while_test;
     250    compile(repeated, while_test_state);
     251    cg_state.stmtsl.push_back(new Assign(while_test, new Var(cg_state.newsym)));
     252    cg_state.stmtsl.push_back(new Assign(while_accum, new Var(cg_state.newsym)));
     253    while_test_state.stmtsl.push_back(new Assign(while_test, new And(new Var(while_test_state.newsym), new Not(new Var(while_accum)))));
     254    while_test_state.stmtsl.push_back(new Assign(while_accum, new Or(new Var(while_accum), new Var(while_test_state.newsym))));
     255    cg_state.stmtsl.push_back(new While(new Var(while_test), while_test_state.stmtsl));
     256    cg_state.newsym = while_accum;
     257}
     258
     259inline void RE_Compiler::compileBoundedRep(RE * repeated, int lb, int ub, CodeGenState & cg_state) {
     260    ub -= lb;
     261    for (; lb; --lb) {
     262        compile(repeated, cg_state);
     263    }
     264    if (ub > 0) {
     265         std::string oldsym = cg_state.newsym;
     266         compile(repeated, cg_state);
     267         compileBoundedRep(repeated, 0, ub - 1, cg_state);
     268         std::string altsym = symgen.get("alt");
     269         cg_state.stmtsl.push_back(new Assign(altsym, new Or(new Var(oldsym), new Var(cg_state.newsym))));
     270         cg_state.newsym = altsym;
     271    }
     272}
     273
     274
     275bool RE_Compiler::hasUnicode(const RE * re) {
     276    bool found = false;
     277    if (re == nullptr) {
     278        throw std::runtime_error("Unexpected Null Value passed to RE Compiler!");
     279    }
     280    else if (const Name * name = dyn_cast<const Name>(re)) {
     281        if ((name->getType() == Name::Type::UnicodeCategory) || (name->getType() == Name::Type::Unicode)) {
     282            found = true;
     283        }
     284    }
     285    else if (const Seq * re_seq = dyn_cast<const Seq>(re)) {
     286        for (auto i = re_seq->cbegin(); i != re_seq->cend(); ++i) {
     287            if (hasUnicode(*i)) {
     288                found = true;
     289                break;
     290            }
     291        }
     292    }
     293    else if (const Alt * re_alt = dyn_cast<const Alt>(re)) {
     294        for (auto i = re_alt->cbegin(); i != re_alt->cend(); ++i) {
     295            if (hasUnicode(*i)) {
     296                found = true;
     297                break;
     298            }
     299        }
     300    }
     301    else if (const Rep * rep = dyn_cast<const Rep>(re)) {
     302        found = hasUnicode(rep->getRE());
     303    }
     304    return found;
     305}
     306
     307} // end of namespace re
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4194 r4197  
    55 */
    66
    7 #ifndef RE_COMPILER_H
    8 #define RE_COMPILER_H
     7#ifndef RE_TO_PABLO_COMPILER_H
     8#define RE_TO_PABLO_COMPILER_H
    99
     10//Regular Expressions
    1011#include "re_re.h"
    11 #include "../utf_encoding.h"
    12 #include "../llvm_gen.h"
     12//Pablo Statements
     13#include "../ps_pablos.h"
     14//Code Generation
     15#include "symbol_generator.h"
     16
    1317#include <string>
     18#include <list>
     19#include <map>
    1420
    1521namespace re {
     22class RE_Compiler {
     23public:
     24    RE_Compiler(std::map<std::string, std::string> name_map);
     25    CodeGenState compile(RE *re);
     26    CodeGenState compile_subexpressions(const std::map<std::string, RE*>& re_map);
     27private:
     28    void compile(RE * re, CodeGenState & cg_state);
     29    void compile(Alt * alt, CodeGenState & cg_state);
     30    void compile(Seq * seq, CodeGenState & cg_state);
     31    void compile(Rep * rep, CodeGenState & cg_state);
     32    void compileUnboundedRep(RE * repeated, int lb, CodeGenState  & cg_state);
     33    void compileBoundedRep(RE * repeated, int lb, int ub, CodeGenState &cg_state);
     34    void compile(Name * name, CodeGenState & cg_state);
    1635
    17 struct processed_parsetree_results{
    18     RE* re;
    19     std::string remaining;
    20 };
    2136
    22 class RE_Compiler
    23 {
    24 public:
    25     RE_Compiler();
    26     LLVM_Gen_RetVal compile(bool show_compile_time,
    27                             bool ascii_only,
    28                             std::string basis_pattern,
    29                             std::string gensym_pattern,
    30                             UTF_Encoding encoding ,
    31                             std::string input_string);
     37
     38    static bool hasUnicode(const RE *re);
     39
     40    SymbolGenerator symgen;
     41    std::map<std::string, std::string> m_name_map;
    3242};
    3343
    3444}
    3545
    36 #endif // RE_COMPILER_H
     46#endif // COMPILER_H
  • icGREP/icgrep-devel/icgrep/re/re_nullable.cpp

    r4194 r4197  
    1919RE * RE_Nullable::removeNullablePrefix(RE * re) {
    2020    if (Seq * seq = dyn_cast<Seq>(re)) {
    21         re = removeNullableSeqPrefix(seq);
     21        re = removeNullablePrefix(seq);
    2222    }
    2323    else if (Alt * alt = dyn_cast<Alt>(re)) {
     
    3333        else if (hasNullablePrefix(rep->getRE())) {
    3434            Seq * seq = makeSeq();
    35             seq->push_back(removeNullablePrefix(rep->getRE()));
     35            seq->push_back(removeNullablePrefix(rep->getRE()->clone()));
    3636            seq->push_back(makeRep(rep->getRE(), rep->getLB() - 1, rep->getLB() - 1));
     37            rep->setRE(nullptr);
     38            delete rep;
    3739            re = RE_Simplifier::simplify(seq);
    3840        }
     
    4446}
    4547
    46 inline Seq * RE_Nullable::removeNullableSeqPrefix(const Seq * seq) {
    47     Seq * new_seq = makeSeq(seq->getType());
     48inline Seq * RE_Nullable::removeNullablePrefix(Seq * seq) {
    4849    if (!seq->empty()) {
     50        std::vector<RE *> list;
    4951        auto i = seq->begin();
    5052        // find the first non-nullable prefix
    5153        while (i != seq->end() && isNullable(*i)) {
     54            delete *i;
    5255            ++i;
    5356        }
    54         if (i == seq->end()) {
    55             return new_seq;
     57        if (i != seq->end()) {
     58            // push the first non-nullable seq item to the front of the new_seq
     59            list.push_back(removeNullablePrefix(*i));
     60            std::copy(++i, seq->end(), std::back_inserter(list));
    5661        }
    57         // push the first non-nullable seq item to the front of the new_seq
    58         new_seq->push_back(removeNullablePrefix(*i));
    59         std::copy(++i, seq->end(), std::back_inserter(*new_seq));
     62        seq->swap(list);
    6063    }
    61     return new_seq;
     64    return seq;
    6265}
    6366
    6467RE * RE_Nullable::removeNullableSuffix(RE * re) {
    6568    if (Seq * seq = dyn_cast<Seq>(re)) {
    66         re = removeNullableSeqSuffix(seq);
     69        re = removeNullableSuffix(seq);
    6770    }
    6871    else if (Alt* alt = dyn_cast<Alt>(re)) {
     
    8083            seq->push_back(RE_Simplifier::simplify(makeRep(rep->getRE()->clone(), rep->getLB() - 1, rep->getLB() - 1)));
    8184            seq->push_back(removeNullableSuffix(rep->getRE()));
     85            rep->setRE(nullptr);
    8286            delete rep;
    8387            re = RE_Simplifier::simplify(seq);
     
    9094}
    9195
    92 inline Seq * RE_Nullable::removeNullableSeqSuffix(const Seq * seq) {
    93     Seq * new_seq = makeSeq(seq->getType());
     96inline Seq * RE_Nullable::removeNullableSuffix(Seq * seq) {
    9497    if (!seq->empty()) {
     98        std::vector<RE *> list;
    9599        auto i = seq->end();
    96100        // find the last non-nullable suffix
    97         while (i != seq->begin() && isNullable(*--i));
    98 
     101        while (i != seq->begin() && isNullable(*--i)) {
     102            delete *i;
     103        }
    99104        if (i != seq->begin()) {
    100             std::copy(seq->begin(), i, std::back_inserter(*new_seq));
    101             new_seq->push_back(removeNullableSuffix(*i));
     105            std::copy(seq->begin(), i, std::back_inserter(list));
     106            list.push_back(removeNullableSuffix(*i));
    102107        }
     108        seq->swap(list);
    103109    }
    104     return new_seq;
     110    return seq;
    105111}
    106112
    107113bool RE_Nullable::isNullable(const RE * re) {
    108114    if (const Seq * re_seq = dyn_cast<const Seq>(re)) {
    109         return isNullableVector(re_seq);
     115        return isNullable(re_seq);
    110116    }
    111117    else if (const Alt* re_alt = dyn_cast<const Alt>(re)) {
    112         return isNullableVector(re_alt);
     118        return isNullable(re_alt);
    113119    }
    114120    else if (const Rep* re_rep = dyn_cast<const Rep>(re)) {
     
    118124}
    119125
    120 inline bool RE_Nullable::isNullableVector(const Vector * vec) {
     126inline bool RE_Nullable::isNullable(const Vector * vec) {
    121127    for (const RE * re : *vec) {
    122128        if (!isNullable(re)) {
  • icGREP/icgrep-devel/icgrep/re/re_nullable.h

    r4194 r4197  
    1111class RE_Nullable {
    1212public:
    13     static RE* removeNullablePrefix(RE* re);
    14     static RE* removeNullableSuffix(RE* re);
     13    static RE * removeNullablePrefix(RE * re);
     14    static RE * removeNullableSuffix(RE * re);
    1515private:
    1616    static bool isNullable(const RE * re);
    17     static bool isNullableVector(const Vector * vec);
    18     static bool hasNullablePrefix(const RE *re);
     17    static bool isNullable(const Vector * vec);
     18    static bool hasNullablePrefix(const RE * re);
    1919    static bool hasNullableSuffix(const RE * re);
    20     static Seq * removeNullableSeqPrefix(const Seq * seq);
    21     static Seq * removeNullableSeqSuffix(const Seq *seq);
     20    static Seq * removeNullablePrefix(Seq * seq);
     21    static Seq * removeNullableSuffix(Seq * seq);
    2222};
    2323
  • icGREP/icgrep-devel/icgrep/re/symbol_generator.cpp

    r4196 r4197  
    77#include "symbol_generator.h"
    88
    9 SymbolGenerator::SymbolGenerator(){
    10         pfxmap = new std::map<std::string, int>();
    11         //std::cout << "pfxmap initialized" << std::endl;
     9namespace re {
     10
     11SymbolGenerator::SymbolGenerator()
     12: pfxmap()
     13{
    1214}
    1315
    14 std::string SymbolGenerator::gensym(std::string prefix)
    15 {
     16std::string SymbolGenerator::get(std::string prefix) {
     17    auto f = pfxmap.find(prefix);
     18    unsigned count = 0;
     19    if (f == pfxmap.end()) {
     20        pfxmap.insert(std::make_pair(prefix, 1));
     21    }
     22    else {
     23        count = f->second++;
     24    }
     25    return prefix + std::to_string(count);
     26}
    1627
    17     std::pair<std::map<std::string, int>::iterator, bool> ret = pfxmap->insert(make_pair(prefix, 0));
    18     std::string sym;
    19     if (ret.second)
    20     {
    21         //The insertion succeeded
    22         sym = prefix + std::to_string(0);
    23         //std::cout << sym << " created" << std::endl;
    24         return sym;
    25     }
    26     else
    27     {
    28         //The insertion failed so we know that the prefix has already been added.
    29         std::map<std::string, int>::iterator iter = pfxmap->find(prefix);
    30         iter->second++;
    31 
    32         sym = prefix + std::to_string(iter->second);
    33         //std::cout << sym << " created" << std::endl;
    34         return sym;
    35        
    36     }
    3728}
  • icGREP/icgrep-devel/icgrep/re/symbol_generator.h

    r4196 r4197  
    88#define SYMBOL_GENERATOR_H
    99
    10 #include <iostream>
    1110#include <string>
    12 #include <sstream>
    13 #include <utility>
    1411#include <map>
    1512
     13namespace re {
    1614
    1715class SymbolGenerator
     
    1917public:
    2018    SymbolGenerator();
    21     std::string gensym(std::string prefix);
     19    std::string get(std::string prefix);
    2220private:
    23     std::map<std::string, int>* pfxmap;
     21    std::map<std::string, unsigned> pfxmap;
    2422};
    2523
     24}
     25
    2626#endif // SYMBOL_GENERATOR_H
Note: See TracChangeset for help on using the changeset viewer.