Changeset 4210


Ignore:
Timestamp:
Oct 4, 2014, 1:14:35 PM (5 years ago)
Author:
nmedfort
Message:

First stage of code generator revamp

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited
2 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4209 r4210  
    4848
    4949# Check if boost has been installed on this system.
    50 set(Boost_USE_STATIC_LIBS ON)
    51 set(Boost_USE_MULTITHREADED OFF) 
    52 set(Boost_USE_STATIC_RUNTIME OFF)
    53 include(FindBoost)
    54 
    55 add_library(PabloADT pablo/pe_advance.cpp pablo/pe_all.cpp pablo/pe_and.cpp pablo/pe_call.cpp pablo/pe_charclass.cpp  pablo/pe_matchstar.cpp pablo/pe_scanthru.cpp pablo/pe_not.cpp  pablo/pe_or.cpp  pablo/pe_pabloe.cpp  pablo/pe_sel.cpp  pablo/pe_var.cpp  pablo/pe_xor.cpp pablo/ps_assign.cpp  pablo/ps_if.cpp  pablo/codegenstate.cpp  pablo/ps_while.cpp pablo/printer_pablos.cpp)
    56 
    57 add_library(RegExpADT re/re_alt.cpp re/re_cc.cpp re/re_end.cpp re/re_name.cpp re/re_parser.cpp re/re_re.cpp re/re_rep.cpp re/re_seq.cpp re/re_start.cpp re/parsefailure.cpp re/re_reducer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/symbol_generator.cpp re/printer_re.cpp)
    58 
     50set(Boost_USE_STATIC_LIBS ON)
     51set(Boost_USE_MULTITHREADED OFF)
     52set(Boost_USE_STATIC_RUNTIME OFF)
     53find_package(Boost 1.21 COMPONENTS system)
     54include_directories("${Boost_INCLUDE_DIRS}")
     55link_directories(${Boost_LIBRARY_DIR})
     56add_library(PabloADT pablo/pe_advance.cpp pablo/pe_all.cpp pablo/pe_and.cpp pablo/pe_call.cpp pablo/pe_charclass.cpp  pablo/pe_matchstar.cpp pablo/pe_scanthru.cpp pablo/pe_not.cpp  pablo/pe_or.cpp  pablo/pe_pabloe.cpp  pablo/pe_sel.cpp  pablo/pe_var.cpp  pablo/pe_xor.cpp pablo/ps_assign.cpp  pablo/ps_if.cpp  pablo/codegenstate.cpp  pablo/symbol_generator.cpp pablo/ps_while.cpp pablo/printer_pablos.cpp)
     57add_library(RegExpADT re/re_alt.cpp re/re_cc.cpp re/re_end.cpp re/re_name.cpp re/re_parser.cpp re/re_re.cpp re/re_rep.cpp re/re_seq.cpp re/re_start.cpp re/parsefailure.cpp re/re_reducer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/printer_re.cpp)
    5958add_library(CCADT cc/cc_compiler.cpp utf_encoding.cpp utf8_encoder.cpp unicode_categories.h unicode_categories-flat.h unicode_categories-simple.h)
    6059
     
    6766include_directories("${PROJECT_SOURCE_DIR}/include/simd-lib/idisa_cpp")
    6867
    69 if (Boost_FOUND)
    70     include_directories(${Boost_INCLUDE_DIRS})
    71 endif()
    72 
    7368# add the executable
    7469add_executable(icgrep icgrep.cpp llvm_gen.cpp llvm_gen_helper.cpp compiler.cpp)
     
    7671target_link_libraries (CCADT PabloADT)
    7772target_link_libraries (icgrep PabloADT RegExpADT CCADT ${REQ_LLVM_LIBRARIES})
    78 if (Boost_FOUND)
    79     target_link_libraries (icgrep ${Boost_LIBRARIES})
    80 endif()
    81 
     73IF (Boost_FOUND)
     74    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOOST")
     75    target_link_libraries (CCADT ${Boost_LIBRARIES})
     76    target_link_libraries (PabloADT ${Boost_LIBRARIES})
     77    target_link_libraries (RegExpADT ${Boost_LIBRARIES})
     78ENDIF()
    8279
    8380#Check compiler support for 0x / 11
     
    103100endif()
    104101
     102
    105103#Disable RunTime Type Information
    106 
    107104IF (MSVC) # using Visual Studio C++
    108105  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-")
     
    210207set(CMAKE_REQUIRED_FLAGS)
    211208set(SIMD_SUPPORT_FOUND)
    212 
    213 MACRO(CHECK_SIMD_SUPPORT type) 
     209set(SIMD_MAX_BLOCK_SIZE)
     210
     211MACRO(CHECK_SIMD_SUPPORT type maxBlockSize)
    214212  IF(NOT SIMD_SUPPORT_FOUND) 
    215213    set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
     
    218216    IF(${type})
    219217      SET(SIMD_SUPPORT_FOUND ${CMAKE_REQUIRED_FLAGS})
     218      SET(SIMD_MAX_BLOCK_SIZE, ${maxBlockSize})
    220219    ENDIF()
    221220    SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
     
    223222ENDMACRO()
    224223
    225 CHECK_SIMD_SUPPORT("AVX_2")
    226 CHECK_SIMD_SUPPORT("AVX_1")
    227 CHECK_SIMD_SUPPORT("SSE4_2")
    228 CHECK_SIMD_SUPPORT("SSE4_1")
    229 CHECK_SIMD_SUPPORT("SSE3")
    230 CHECK_SIMD_SUPPORT("SSE2")
    231 CHECK_SIMD_SUPPORT("SSE1")
     224CHECK_SIMD_SUPPORT("AVX_2" 256)
     225CHECK_SIMD_SUPPORT("AVX_1" 128)
     226CHECK_SIMD_SUPPORT("SSE4_2" 128)
     227CHECK_SIMD_SUPPORT("SSE4_1" 128)
     228CHECK_SIMD_SUPPORT("SSE3" 128)
     229CHECK_SIMD_SUPPORT("SSE2" 128)
     230CHECK_SIMD_SUPPORT("SSE1" 128)
    232231
    233232SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_SUPPORT_FOUND}")
     
    241240
    242241IF (BLOCK_SIZE_256)
    243   IF("${SIMD_SUPPORT_FOUND}" STREQUAL "${ENABLE_AVX_2}")
     242  IF(${SIMD_MAX_BLOCK_SIZE} >= 256)
    244243    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_S2P_AVX2 -DBLOCK_SIZE=256 -march=core-avx2 -m64")
    245244  ELSE()
    246     MESSAGE(FATAL_ERROR "AVX2 is required for BLOCK_SIZE=256")
     245    MESSAGE(FATAL_ERROR "AVX2 or better is required for BLOCK_SIZE=256")
    247246  ENDIF()
    248247ENDIF()
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4209 r4210  
    3030#include <re/re_seq.h>
    3131#include <re/re_rep.h>
     32#include <re/re_name.h>
    3233
    3334#include <utility>
     
    4647namespace cc {
    4748
    48 CC_Compiler::CC_Compiler(const Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern)
    49 : mEncoding(encoding)
     49CC_Compiler::CC_Compiler(CodeGenState & cg, const Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern)
     50: mCG(cg)
     51, mEncoding(encoding)
    5052, mGenSymPattern(gensym_pattern)
    5153, mGenSymCounter(0)
     
    6264}
    6365
    64 
    65 void CC_Compiler::add_predefined(std::string key_value, Expression* mapped_value)
    66 {
     66void CC_Compiler::compile(const REMap & re_map) {
     67    process_re_map(re_map);
     68    for (auto i =  re_map.rbegin(); i != re_map.rend(); ++i) {
     69        //This is specifically for the utf8 multibyte character classes.
     70        if (Seq * seq = dyn_cast<Seq>(i->second)) {
     71            if (seq->getType() == Seq::Type::Byte) {
     72                auto j = seq->begin();
     73                while (true) {
     74                    Name * name = dyn_cast<Name>(*j);
     75                    assert (name);
     76                    CharClass * cc_mask = makeCharClass(name->getName());
     77                    if (++j != seq->end()) {
     78                        mCG.push_back(makeAssign(mCG.symgen("marker"), makeAdvance(cc_mask)));
     79                    }
     80                    else {
     81                        mCG.push_back(makeAssign(seq->getName(), cc_mask));
     82                        break;
     83                    }
     84                }
     85            }
     86        }
     87    }
     88}
     89
     90inline void CC_Compiler::add_predefined(std::string key_value, Expression* mapped_value) {
    6791    mCommon_Expression_Map.insert(make_pair(key_value, mapped_value));
    6892}
     
    7195{   
    7296    //Add the new mapping to the list of pablo statements:
    73     mStmtsl.push_back(makeAssign(varname, expr->pablo_expr));
     97    mCG.push_back(makeAssign(varname, expr->pablo_expr));
    7498
    7599    //Add the new mapping to the common expression map:
     
    92116        return add_assignment(mGenSymPattern + std::to_string(++mGenSymCounter), expr);
    93117    }
    94 }
    95 
    96 CC_Compiler::List CC_Compiler::get_compiled()
    97 {
    98     return mStmtsl;
    99 }
    100 
    101 void CC_Compiler::compile_from_map(const REMap &re_map)
    102 {
    103     process_re_map(re_map);
    104118}
    105119
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r4209 r4210  
    3030    typedef std::list<pablo::PabloE *>          List;
    3131public:
    32     CC_Compiler(const Encoding encoding, const std::string basis_pattern = "basis", const std::string gensym_pattern = "temp");
    33     void compile_from_map(const REMap & re_map);
    34     List get_compiled();
     32
     33    CC_Compiler(pablo::CodeGenState & cg, const Encoding encoding, const std::string basis_pattern = "basis", const std::string gensym_pattern = "temp");
     34
     35    void compile(const REMap & re_map);
    3536
    3637    const std::string getBasisPattern() const {
     
    5758    Expression* expr_to_variable(Expression* cgo);
    5859
    59 
    60     Encoding                    mEncoding;
     60    pablo::CodeGenState &       mCG;
     61    const Encoding              mEncoding;
    6162    const std::string           mBasisPattern;
    6263    const std::string           mGenSymPattern;
    6364    int                         mGenSymCounter;
    64     List                        mStmtsl;
    6565    ExpressionMap               mCommon_Expression_Map;
    6666};
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r4209 r4210  
    125125
    126126
    127     CC_Compiler cc_compiler(encoding);
    128     cc_compiler.compile_from_map(re_map);
    129     auto cc_stmtsl = cc_compiler.get_compiled();
     127    SymbolGenerator symgen;
     128    CodeGenState cg(symgen);
     129
     130    CC_Compiler cc_compiler(cg, encoding);
     131    cc_compiler.compile(re_map);
    130132    #ifdef DEBUG_PRINT_PBIX_AST
    131133    //Print to the terminal the AST that was generated by the character class compiler.
    132     std::cerr << "Pablo CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
     134    std::cerr << "Pablo CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cg.expressions()) << std::endl;
     135    //Print to the terminal the AST that was generated for the re subexpressions.
     136    std::cerr << "Subexpressions:" << StatementPrinter::PrintStmts(cg) << std::endl;
    133137    #endif
    134138
    135     RE_Compiler pbix_compiler(name_map);
    136     CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
    137     #ifdef DEBUG_PRINT_PBIX_AST
    138     //Print to the terminal the AST that was generated for the re subexpressions.
    139     std::cerr << "Subexpressions:" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << std::endl;
    140     #endif
    141 
    142     CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
     139    RE_Compiler pbix_compiler(cg, name_map);
     140    pbix_compiler.compile(re_ast);
    143141    #ifdef DEBUG_PRINT_PBIX_AST
    144142    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    145     std::cerr << "Final Pablo AST:" << StatementPrinter::PrintStmts(re_cg_state) << ")" << std::endl;
     143    std::cerr << "Final Pablo AST:" << StatementPrinter::PrintStmts(cg) << ")" << std::endl;
    146144    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
    147145    // std::cerr << "Pablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) << std::endl;
     
    149147
    150148    LLVM_Generator irgen(name_map, cc_compiler.getBasisPattern(), encoding.getBits());
    151 
    152149    unsigned long long cycles = 0;
    153150    double timer = 0;
     
    158155    }
    159156
    160     LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(re_cg_state, re_subexpression_cg_state, cc_stmtsl);
     157    LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(cg);
    161158    if (show_compile_time)
    162159    {
  • icGREP/icgrep-devel/icgrep/llvm_gen.cpp

    r4207 r4210  
    367367}
    368368
    369 LLVM_Gen_RetVal LLVM_Generator::Generate_LLVMIR(CodeGenState cg_state, CodeGenState subexpression_cg_state, List cc_cgo_stmtsl)
     369LLVM_Gen_RetVal LLVM_Generator::Generate_LLVMIR(const CodeGenState & cg_state)
    370370{
    371371    //Create the module.
     
    388388    DefineTypes();
    389389    DeclareFunctions();
    390     DeclareCallFunctions(cg_state.stmtsl);
     390    DeclareCallFunctions(cg_state.expressions());
    391391
    392392    Function::arg_iterator args = mFunc_process_block->arg_begin();
     
    400400    //Create the carry queue.
    401401    mCarryQueueIdx = 0;
    402     mCarryQueueSize = LLVM_Generator_Helper::CarryCount_PabloStatements(subexpression_cg_state.stmtsl);
    403     mCarryQueueSize += LLVM_Generator_Helper::CarryCount_PabloStatements(cg_state.stmtsl);
     402    mCarryQueueSize += LLVM_Generator_Helper::CarryCount_PabloStatements(cg_state.expressions());
    404403    /* The following may be needed if carry-generating operations are ever inserted
    405404       by the character class compiler.
     
    423422
    424423    //Generate the IR instructions for the function.
    425 
    426     Generate_PabloStatements(cc_cgo_stmtsl);
    427     Generate_PabloStatements(subexpression_cg_state.stmtsl);
    428     Generate_PabloStatements(cg_state.stmtsl);
     424    Generate_PabloStatements(cg_state.expressions());
    429425    SetReturnMarker(cg_state.newsym, 0);
    430426    SetReturnMarker(m_name_map.find("LineFeed")->second, 1);
  • icGREP/icgrep-devel/icgrep/llvm_gen.h

    r4208 r4210  
    9292    LLVM_Generator(std::map<std::string, std::string> name_map, std::string basis_pattern, int bits);
    9393    ~LLVM_Generator();
    94     LLVM_Gen_RetVal Generate_LLVMIR(CodeGenState cg_state,
    95                                     CodeGenState subexpression_cg_state,
    96                                     List cc_cgo);
     94    LLVM_Gen_RetVal Generate_LLVMIR(const CodeGenState &cg_state);
    9795private:
    9896    void MakeLLVMModule();
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.h

    r4209 r4210  
    2828#include <vector>
    2929#include <string>
     30#include <pablo/symbol_generator.h>
    3031
    3132namespace pablo {
    3233
    3334struct CodeGenState {
     35
     36    CodeGenState(SymbolGenerator & symgen)
     37    : mSymbolGenerator(symgen)
     38    , mPredecessor(nullptr)
     39    {
     40
     41    }
     42
     43    CodeGenState(CodeGenState & cg)
     44    : mSymbolGenerator(cg.mSymbolGenerator)
     45    , mPredecessor(&cg)
     46    {
     47
     48    }
     49
    3450
    3551//    PabloE * createAll(const bool value);
     
    104120
    105121
     122    inline void push_back(PabloE * expr) {
     123        mExpressions.push_back(expr);
     124    }
    106125
     126    inline std::string symgen(std::string prefix) {
     127        return mSymbolGenerator.get(prefix);
     128    }
    107129
    108     std::list<PabloE *>  stmtsl;
    109     std::string          newsym;
     130    inline const std::list<PabloE *> & expressions() const {
     131        return mExpressions;
     132    }
     133
     134    std::string newsym;
     135private:
     136    std::list<PabloE *>     mExpressions;
     137    SymbolGenerator &       mSymbolGenerator;
     138    CodeGenState * const    mPredecessor;
    110139};
    111140
  • icGREP/icgrep-devel/icgrep/pablo/printer_pablos.cpp

    r4207 r4210  
    4545    strOut += "],[";
    4646
    47     strOut = Print_PB_PabloStmts(cg_state.stmtsl, strOut);
     47    strOut = Print_PB_PabloStmts(cg_state.expressions(), strOut);
    4848
    4949    strOut = strOut.substr(0, strOut.length() - 1);
  • icGREP/icgrep-devel/icgrep/pablo/symbol_generator.cpp

    r4209 r4210  
    55 */
    66
    7 #include "symbol_generator.h"
     7#include <pablo/symbol_generator.h>
    88
    9 namespace re {
     9namespace pablo {
    1010
    1111SymbolGenerator::SymbolGenerator()
  • icGREP/icgrep-devel/icgrep/pablo/symbol_generator.h

    r4209 r4210  
    1111#include <map>
    1212
    13 namespace re {
     13namespace pablo {
    1414
    1515class SymbolGenerator
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4209 r4210  
    4242namespace re {
    4343
    44 RE_Compiler::RE_Compiler(std::map<std::string, std::string> name_map)
    45 : m_name_map(name_map)
    46 , symgen()
     44RE_Compiler::RE_Compiler(CodeGenState & baseCG, std::map<std::string, std::string> name_map)
     45: mBaseCG(baseCG)
     46, m_name_map(name_map)
    4747{
    4848
    4949}
    5050
    51 CodeGenState RE_Compiler::compile_subexpressions(const std::map<std::string, RE*>& re_map)
    52 {
    53     CodeGenState cg_state;
    54     for (auto i =  re_map.rbegin(); i != re_map.rend(); ++i) {
    55         //This is specifically for the utf8 multibyte character classes.
    56         if (Seq * seq = dyn_cast<Seq>(i->second)) {
    57             if (seq->getType() == Seq::Type::Byte) {
    58                 std::string gs_retVal = symgen.get("start_marker");
    59                 cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAll(1)));
    60                 for (auto j = seq->begin();; ) {
    61                     Name * name = dyn_cast<Name>(*j);
    62                     assert (name);
    63                     auto * cc_mask = makeAnd(makeVar(gs_retVal), makeCharClass(name->getName()));
    64                     if (++j != seq->end()) {
    65                         gs_retVal = symgen.get("marker");
    66                         cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAdvance(cc_mask)));
    67                     }
    68                     else {
    69                         cg_state.stmtsl.push_back(makeAssign(seq->getName(), cc_mask));
    70                         break;
    71                     }
    72                 }
    73                 cg_state.newsym = gs_retVal;
    74             }
    75         }
    76     }
    77     return cg_state;
    78 }
    79 
    80 CodeGenState RE_Compiler::compile(RE * re)
    81 {
    82     CodeGenState cg_state;
    83 
    84     std::string gs_m0 = symgen.get("start_marker");
    85     cg_state.stmtsl.push_back(makeAssign(gs_m0, makeAll(1)));
     51void RE_Compiler::compile(RE * re) {
     52
     53    std::string gs_m0 = mBaseCG.symgen("start_marker");
     54    mBaseCG.push_back(makeAssign(gs_m0, makeAll(1)));
    8655
    8756    if (hasUnicode(re)) {
    88         cg_state.newsym = gs_m0;
     57        mBaseCG.newsym = gs_m0;
    8958        //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.
    90         std::string gs_initial = symgen.get("internal.initial");
     59        std::string gs_initial = mBaseCG.symgen("internal.initial");
    9160        m_name_map.insert(make_pair("internal.initial", gs_initial));
    9261        PabloE * u8single = makeVar(m_name_map.find("UTF8-SingleByte")->second);
     
    9564        PabloE * u8pfx4 = makeVar(m_name_map.find("UTF8-Prefix4")->second);
    9665        PabloE * u8pfx = makeOr(makeOr(u8pfx2, u8pfx3), u8pfx4);
    97         cg_state.stmtsl.push_back(makeAssign(gs_initial, makeOr(u8pfx, u8single)));
    98         cg_state.newsym = gs_initial;
     66        mBaseCG.push_back(makeAssign(gs_initial, makeOr(u8pfx, u8single)));
     67        mBaseCG.newsym = gs_initial;
    9968
    10069        //Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
    101         cg_state.newsym = gs_m0;
    102         std::string gs_nonfinal = symgen.get("internal.nonfinal");
     70        mBaseCG.newsym = gs_m0;
     71        std::string gs_nonfinal = mBaseCG.symgen("internal.nonfinal");
    10372        m_name_map.insert(make_pair("internal.nonfinal", gs_nonfinal));
    10473        //#define USE_IF_FOR_NONFINAL
    10574        #ifdef USE_IF_FOR_NONFINAL
    106         cg_state.stmtsl.push_back(make_assign(gs_nonfinal, make_all(0)));
     75        cg.push_back(make_assign(gs_nonfinal, make_all(0)));
    10776        #endif
    10877        PabloE * u8scope32 = makeAdvance(u8pfx3);
     
    11382        std::list<PabloE *> * if_body = new std::list<PabloE *> ();
    11483        if_body->push_back(assign_non_final);
    115         cg_state.stmtsl.push_back(new If(u8pfx, *if_body));
     84        cg.push_back(new If(u8pfx, *if_body));
    11685        #else
    117         cg_state.stmtsl.push_back(assign_non_final);
     86        mBaseCG.push_back(assign_non_final);
    11887        #endif
    119         cg_state.newsym = gs_nonfinal;
    120     }
    121 
    122     cg_state.newsym = gs_m0;
    123     compile(re, cg_state);
     88        mBaseCG.newsym = gs_nonfinal;
     89    }
     90
     91    mBaseCG.newsym = gs_m0;
     92    process(re, mBaseCG);
    12493
    12594    //These three lines are specifically for grep.
    126     std::string gs_retVal = symgen.get("marker");
    127     cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAnd(makeMatchStar(makeVar(cg_state.newsym),
     95    std::string gs_retVal = mBaseCG.symgen("marker");
     96    mBaseCG.push_back(makeAssign(gs_retVal, makeAnd(makeMatchStar(makeVar(mBaseCG.newsym),
    12897        makeNot(makeVar(m_name_map.find("LineFeed")->second))), makeVar(m_name_map.find("LineFeed")->second))));
    129     cg_state.newsym = gs_retVal;
    130 
    131     return cg_state;
    132 }
    133 
    134 void RE_Compiler::compile(RE * re, CodeGenState & cg_state) {
     98    mBaseCG.newsym = gs_retVal;
     99}
     100
     101void RE_Compiler::process(RE * re, CodeGenState & cg) {
    135102    if (Name * name = dyn_cast<Name>(re)) {
    136         compile(name, cg_state);
     103        process(name, cg);
    137104    }
    138105    else if (Seq* seq = dyn_cast<Seq>(re)) {
    139         compile(seq, cg_state);
     106        process(seq, cg);
    140107    }
    141108    else if (Alt * alt = dyn_cast<Alt>(re)) {
    142         compile(alt, cg_state);
     109        process(alt, cg);
    143110    }
    144111    else if (Rep * rep = dyn_cast<Rep>(re)) {
    145         compile(rep, cg_state);
     112        process(rep, cg);
    146113    }
    147114    else if (isa<Start>(re)) {
    148         std::string gs_retVal = symgen.get("sol");
    149         cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAnd(makeVar(cg_state.newsym), makeNot(makeAdvance(makeNot(makeCharClass(m_name_map.find("LineFeed")->second)))))));
    150         cg_state.newsym = gs_retVal;
     115        std::string gs_retVal = cg.symgen("sol");
     116        cg.push_back(makeAssign(gs_retVal, makeAnd(makeVar(cg.newsym), makeNot(makeAdvance(makeNot(makeCharClass(m_name_map.find("LineFeed")->second)))))));
     117        cg.newsym = gs_retVal;
    151118    }
    152119    else if (isa<End>(re)) {
    153         std::string gs_retVal = symgen.get("eol");
    154         cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAnd(makeVar(cg_state.newsym), makeCharClass(m_name_map.find("LineFeed")->second))));
    155         cg_state.newsym = gs_retVal;
    156     }
    157 }
    158 
    159 inline void RE_Compiler::compile(Name * name, CodeGenState & cg_state) {
    160     std::string gs_retVal = symgen.get("marker");
    161     PabloE * markerExpr = makeVar(cg_state.newsym);
     120        std::string gs_retVal = cg.symgen("eol");
     121        cg.push_back(makeAssign(gs_retVal, makeAnd(makeVar(cg.newsym), makeCharClass(m_name_map.find("LineFeed")->second))));
     122        cg.newsym = gs_retVal;
     123    }
     124}
     125
     126inline void RE_Compiler::process(Name * name, CodeGenState & cg) {
     127    std::string gs_retVal = cg.symgen("marker");
     128    PabloE * markerExpr = makeVar(cg.newsym);
    162129    if (name->getType() != Name::Type::FixedLength) {
    163130        // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
     
    176143                                makeCharClass(m_name_map.find("internal.nonfinal")->second)));
    177144    }
    178     cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAdvance(makeAnd(ccExpr, markerExpr))));
    179     cg_state.newsym = gs_retVal;
    180 }
    181 
    182 inline void RE_Compiler::compile(Seq * seq, CodeGenState & cg_state) {
     145    cg.push_back(makeAssign(gs_retVal, makeAdvance(makeAnd(ccExpr, markerExpr))));
     146    cg.newsym = gs_retVal;
     147}
     148
     149inline void RE_Compiler::process(Seq * seq, CodeGenState & cg) {
    183150    for (RE * re : *seq) {
    184         compile(re, cg_state);
    185     }
    186 }
    187 
    188 inline void RE_Compiler::compile(Alt * alt, CodeGenState & cg_state) {
     151        process(re, cg);
     152    }
     153}
     154
     155inline void RE_Compiler::process(Alt * alt, CodeGenState & cg) {
    189156    if (alt->empty()) {
    190         std::string gs_retVal = symgen.get("always_fail_marker");
    191         cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeAll(0)));
    192         cg_state.newsym = gs_retVal;
     157        std::string gs_retVal = cg.symgen("always_fail_marker");
     158        cg.push_back(makeAssign(gs_retVal, makeAll(0)));
     159        cg.newsym = gs_retVal;
    193160    }
    194161    else {
    195162        auto i = alt->begin();
    196         const std::string startsym = cg_state.newsym;
    197         compile(*i, cg_state);
     163        const std::string startsym = cg.newsym;
     164        process(*i, cg);
    198165        while (++i != alt->end()) {
    199             std::string alt1 = cg_state.newsym;
    200             cg_state.newsym = startsym;
    201             compile(*i, cg_state);
    202             std::string newsym = symgen.get("alt");
    203             cg_state.stmtsl.push_back(makeAssign(newsym, makeOr(makeVar(alt1), makeVar(cg_state.newsym))));
    204             cg_state.newsym = newsym;
    205         }
    206     }
    207 }
    208 
    209 inline void RE_Compiler::compile(Rep * rep, CodeGenState & cg_state) {
     166            std::string alt1 = cg.newsym;
     167            cg.newsym = startsym;
     168            process(*i, cg);
     169            std::string newsym = cg.symgen("alt");
     170            cg.push_back(makeAssign(newsym, makeOr(makeVar(alt1), makeVar(cg.newsym))));
     171            cg.newsym = newsym;
     172        }
     173    }
     174}
     175
     176inline void RE_Compiler::process(Rep * rep, CodeGenState & cg) {
    210177    if (rep->getUB() == Rep::UNBOUNDED_REP) {
    211         compileUnboundedRep(rep->getRE(), rep->getLB(), cg_state);
     178        processUnboundedRep(rep->getRE(), rep->getLB(), cg);
    212179    }
    213180    else { // if (rep->getUB() != Rep::UNBOUNDED_REP)
    214         compileBoundedRep(rep->getRE(), rep->getLB(), rep->getUB(), cg_state);
    215     }
    216 }
    217 
    218 inline void RE_Compiler::compileUnboundedRep(RE * repeated, int lb, CodeGenState & cg_state) {
    219     while (lb > 0) {
    220         compile(repeated, cg_state);
    221         lb--;
     181        processBoundedRep(rep->getRE(), rep->getLB(), rep->getUB(), cg);
     182    }
     183}
     184
     185inline void RE_Compiler::processUnboundedRep(RE * repeated, int lb, CodeGenState & cg) {
     186    while (lb-- != 0) {
     187        process(repeated, cg);
    222188    }
    223189    if (isa<Name>(repeated)) {
    224190        Name * rep_name = dyn_cast<Name>(repeated);
    225         std::string gs_retVal = symgen.get("marker");
     191        std::string gs_retVal = cg.symgen("marker");
    226192
    227193        PabloE* ccExpr;
     
    237203        }
    238204        if (rep_name->getType() == Name::Type::FixedLength) {
    239             cg_state.stmtsl.push_back(makeAssign(gs_retVal, makeMatchStar(makeVar(cg_state.newsym), ccExpr)));
     205            cg.push_back(makeAssign(gs_retVal, makeMatchStar(makeVar(cg.newsym), ccExpr)));
    240206        }
    241207        else { // Name::Unicode and Name::UnicodeCategory
    242             cg_state.stmtsl.push_back(makeAssign(gs_retVal,
    243                 makeAnd(makeMatchStar(makeVar(cg_state.newsym),
     208            cg.push_back(makeAssign(gs_retVal,
     209                makeAnd(makeMatchStar(makeVar(cg.newsym),
    244210                        makeOr(makeCharClass(m_name_map.find("internal.nonfinal")->second), ccExpr)),
    245211                               makeCharClass(m_name_map.find("internal.initial")->second))));
    246212        }
    247         cg_state.newsym = gs_retVal;
    248      
     213        cg.newsym = gs_retVal;
    249214    }
    250215    else {
    251       std::string while_test = symgen.get("while_test");
    252       std::string while_accum = symgen.get("while_accum");
    253       CodeGenState while_test_state;
    254       while_test_state.newsym = while_test;
    255       compile(repeated, while_test_state);
    256       cg_state.stmtsl.push_back(makeAssign(while_test, makeVar(cg_state.newsym)));
    257       cg_state.stmtsl.push_back(makeAssign(while_accum, makeVar(cg_state.newsym)));
    258       while_test_state.stmtsl.push_back(makeAssign(while_test, makeAnd(makeVar(while_test_state.newsym), makeNot(makeVar(while_accum)))));
    259       while_test_state.stmtsl.push_back(makeAssign(while_accum, makeOr(makeVar(while_accum), makeVar(while_test_state.newsym))));
    260       cg_state.stmtsl.push_back(makeWhile(makeVar(while_test), while_test_state.stmtsl));
    261       cg_state.newsym = while_accum;
    262     }
    263 }
    264 
    265 inline void RE_Compiler::compileBoundedRep(RE * repeated, int lb, int ub, CodeGenState & cg_state) {
     216      std::string while_test = cg.symgen("while_test");
     217      std::string while_accum = cg.symgen("while_accum");
     218
     219      CodeGenState wt(cg);
     220
     221      wt.newsym = while_test;
     222      process(repeated, wt);
     223
     224      cg.push_back(makeAssign(while_test, makeVar(cg.newsym)));
     225      cg.push_back(makeAssign(while_accum, makeVar(cg.newsym)));
     226
     227      wt.push_back(makeAssign(while_test, makeAnd(makeVar(wt.newsym), makeNot(makeVar(while_accum)))));
     228      wt.push_back(makeAssign(while_accum, makeOr(makeVar(while_accum), makeVar(wt.newsym))));
     229
     230      cg.push_back(makeWhile(makeVar(while_test), wt.expressions()));
     231      cg.newsym = while_accum;
     232    }
     233}
     234
     235inline void RE_Compiler::processBoundedRep(RE * repeated, int lb, int ub, CodeGenState & cg) {
    266236    ub -= lb;
    267     for (; lb; --lb) {
    268         compile(repeated, cg_state);
     237    while(lb-- != 0) {
     238        process(repeated, cg);
    269239    }
    270240    if (ub > 0) {
    271          std::string oldsym = cg_state.newsym;
    272          compile(repeated, cg_state);
    273          compileBoundedRep(repeated, 0, ub - 1, cg_state);
    274          std::string altsym = symgen.get("alt");
    275          cg_state.stmtsl.push_back(makeAssign(altsym, makeOr(makeVar(oldsym), makeVar(cg_state.newsym))));
    276          cg_state.newsym = altsym;
     241         std::string oldsym = cg.newsym;
     242         process(repeated, cg);
     243         processBoundedRep(repeated, 0, ub - 1, cg);
     244         std::string altsym = cg.symgen("alt");
     245         cg.push_back(makeAssign(altsym, makeOr(makeVar(oldsym), makeVar(cg.newsym))));
     246         cg.newsym = altsym;
    277247    }
    278248}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4207 r4210  
    1010#include <pablo/codegenstate.h>
    1111#include <re/re_re.h>
    12 #include <re/symbol_generator.h>
    1312
    1413#include <string>
     
    1918class RE_Compiler {
    2019public:
    21     RE_Compiler(std::map<std::string, std::string> name_map);
    22     pablo::CodeGenState compile(RE *re);
    23     pablo::CodeGenState compile_subexpressions(const std::map<std::string, RE*>& re_map);
     20
     21    RE_Compiler(pablo::CodeGenState & baseCG, std::map<std::string, std::string> name_map);
     22
     23    void compile(RE * re);
     24
    2425private:
    25     void compile(RE * re, pablo::CodeGenState & cg_state);
    26     void compile(Alt * alt, pablo::CodeGenState & cg_state);
    27     void compile(Seq * seq, pablo::CodeGenState & cg_state);
    28     void compile(Rep * rep, pablo::CodeGenState & cg_state);
    29     void compileUnboundedRep(RE * repeated, int lb, pablo::CodeGenState & cg_state);
    30     void compileBoundedRep(RE * repeated, int lb, int ub, pablo::CodeGenState &cg_state);
    31     void compile(Name * name, pablo::CodeGenState & cg_state);
     26    void process(RE * re, pablo::CodeGenState & cg_state);
     27    void process(Alt * alt, pablo::CodeGenState & cg_state);
     28    void process(Seq * seq, pablo::CodeGenState & cg_state);
     29    void process(Rep * rep, pablo::CodeGenState & cg_state);
     30    void processUnboundedRep(RE * repeated, int lb, pablo::CodeGenState & cg_state);
     31    void processBoundedRep(RE * repeated, int lb, int ub, pablo::CodeGenState & cg_state);
     32    void process(Name * name, pablo::CodeGenState & cg_state);
    3233
    3334    static bool hasUnicode(const RE *re);
    3435
    35     SymbolGenerator symgen;
    36     std::map<std::string, std::string> m_name_map;
     36    pablo::CodeGenState &               mBaseCG;
     37    std::map<std::string, std::string>  m_name_map;
    3738};
    3839
  • icGREP/icgrep-devel/icgrep/re/re_re.h

    r4206 r4210  
    1111#include <assert.h>
    1212#include <llvm/Support/Casting.h>
     13//#include <boost/pool/pool_alloc.hpp>
    1314
    1415using namespace llvm;
     
    5354    typedef std::initializer_list<RE *> InitializerList;
    5455    virtual ~RE() = 0;
     56
     57//    static inline void InitalizeMemory() {
     58//    }
     59//    static inline void ReleaseMemory() {
     60//        Allocator.
     61//    }
    5562protected:
    5663    inline RE(const ClassTypeId id)
     
    5966    }
    6067    const ClassTypeId mClassTypeId;
     68//    static boost::fast_pool_allocator<RE> Allocator;
    6169};
    6270
     
    6472public:
    6573    virtual ~Vector() {
    66         for (RE * re : *this) {
    67             delete re;
    68         }
    6974    }
    7075protected:
     
    99104//    }
    100105//    virtual ~Pair() {
    101 //        delete _lh;
    102 //        delete _rh;
    103106//    }
    104107//protected:
Note: See TracChangeset for help on using the changeset viewer.