Changeset 3914


Ignore:
Timestamp:
Jul 21, 2014, 11:49:25 AM (5 years ago)
Author:
cameron
Message:

Updates for icgrep-0.9: re simplifications, re names, replimit mods, debugged while loops

Location:
icGREP/icgrep-devel/icgrep
Files:
6 deleted
26 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r3850 r3914  
    4040add_library(PabloADT pe_advance.cpp  pe_all.cpp  pe_and.cpp  pe_charclass.cpp  pe_matchstar.cpp  pe_not.cpp  pe_or.cpp  pe_pabloe.cpp  pe_sel.cpp  pe_var.cpp  pe_xor.cpp ps_assign.cpp  ps_if.cpp  ps_pablos.cpp  ps_while.cpp printer_pablos.cpp)
    4141
    42 add_library(RegExpADT re_alt.cpp  re_cc.cpp  re_end.cpp  re_parser.cpp  re_re.cpp  re_rep.cpp  re_seq.cpp  re_start.cpp  rl_replimit.cpp  rl_unbounded.cpp  rl_upperbound.cpp parsefailure.cpp  parseresult.cpp  parsesuccess.cpp printer_re.cpp)
     42add_library(RegExpADT re_alt.cpp  re_cc.cpp  re_end.cpp  re_name.cpp re_parser.cpp  re_re.cpp  re_rep.cpp  re_seq.cpp  re_start.cpp parsefailure.cpp  parseresult.cpp  parsesuccess.cpp printer_re.cpp)
    4343
    4444
     
    5050 
    5151# add the executable
    52 add_executable(icgrep icgrep.cpp llvm_gen.cpp  llvm_gen_helper.cpp utf_encoding.cpp cc_codegenobject.cpp  cc_compiler.cpp  cc_compiler_helper.cpp re_compiler.cpp pbix_compiler.cpp  symbol_generator.cpp utf8_encoder.cpp)
     52add_executable(icgrep icgrep.cpp llvm_gen.cpp  llvm_gen_helper.cpp utf_encoding.cpp cc_codegenobject.cpp  cc_compiler.cpp  cc_compiler_helper.cpp re_simplifier.cpp re_reducer.cpp re_nullable.cpp re_compiler.cpp pbix_compiler.cpp  symbol_generator.cpp utf8_encoder.cpp)
    5353
    5454target_link_libraries (icgrep PabloADT RegExpADT ${REQ_LLVM_LIBRARIES})
  • icGREP/icgrep-devel/icgrep/cc_compiler.cpp

    r3850 r3914  
    1212}
    1313
    14 std::list<PabloS*> CC_Compiler::compile(std::string basis_pattern, std::string gensym_pattern, RE* re, std::list<CC*> predefined)
     14std::list<PabloS*> CC_Compiler::compile(std::string basis_pattern,
     15                                        std::string gensym_pattern,
     16                                        const std::map<std::string, RE*>& re_map,
     17                                        std::list<CC*> predefined)
    1518{
    1619    mEncoding.setBasisPattern(basis_pattern);
     
    2730    }
    2831
    29     process_re(cgo, re);
     32    process_re_map(cgo, re_map);
    3033    process_predefined(cgo, predefined);
    3134
     
    3336}
    3437
    35 void CC_Compiler::process_re(CC_CodeGenObject &cgo, RE *re)
    36 {
     38void CC_Compiler::process_re_map(CC_CodeGenObject &cgo,const std::map<std::string, RE*>& re_map)
     39{
     40    for (auto it =  re_map.rbegin(); it != re_map.rend(); ++it)
     41    {
     42        process_re(cgo, it->second);
     43    }
     44}
     45
     46void CC_Compiler::process_re(CC_CodeGenObject &cgo, RE* re)
     47{
     48
    3749    if (Alt* re_alt = dynamic_cast<Alt*>(re))
    3850    {
  • icGREP/icgrep-devel/icgrep/cc_compiler.h

    r3850 r3914  
    1919#include <string>
    2020#include <list>
     21#include <map>
     22#include <algorithm>
    2123
    2224#include <cassert>
    2325#include <stdlib.h>
    24 
    25 //***********************************
    26 //TODO: Just for development
    27 //#include "printer_pablos.h"
    28 //***********************************
    2926
    3027#define INT2STRING(i) static_cast<std::ostringstream*>(&(std::ostringstream() << i))->str()
     
    3431public:
    3532    CC_Compiler(UTF_Encoding encoding);
    36     std::list<PabloS*> compile(std::string basis_pattern, std::string gensym_pattern, RE* re, std::list<CC*> predefined);
     33    std::list<PabloS*> compile(std::string basis_pattern,
     34                               std::string gensym_pattern,
     35                               const std::map<std::string, RE*>& re_map,
     36                               std::list<CC*> predefined);
    3737private:
     38    void process_re_map(CC_CodeGenObject& cgo, const std::map<std::string, RE*>& re_map);
    3839    void process_re(CC_CodeGenObject& cgo, RE* re);
    3940    void process_predefined(CC_CodeGenObject& cgo, std::list<CC*> predefined);
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r3850 r3914  
    7575int main(int argc, char *argv[])
    7676{
    77     double timer;
    78 
    7977    char * inregex, * fileregex, * infilename, * outfilename;
    8078    FILE *infile, *outfile, *regexfile;
     
    8482    int print_version_option = 0;
    8583    int regex_from_file_option = 0;
     84    int ascii_only_option = 0;
    8685
    8786    int compile_time_option = 0;
    8887
     88    unsigned long long cycles = 0;
     89    double timer = 0;
     90
     91    long lSize = 0;
     92
    8993    size_t result;
    9094
    91     long lSize;
    92 
    93     while ((opt_code = getopt(argc, argv, "cvft")) != -1)
     95    while ((opt_code = getopt(argc, argv, "cvfta")) != -1)
    9496    {
    9597        switch (opt_code)
     
    107109            compile_time_option = 1;
    108110            break;
     111        case 'a':
     112            ascii_only_option = 1;
     113            break;
    109114        case '?':
    110115            break;
    111116        default:
    112117            printf ("Invalid option: %c\n", opt_code);
    113             printf("Usage: %s [-c] [-v] [-f] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
     118            printf("Usage: %s [-c] [-v] [-f] [-t] [-a] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
    114119                    exit(-1);
    115120        }
     
    119124    {
    120125        printf ("Too few arguments\n");
    121         printf("Usage: %s [-c] [-v] [-f] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
     126        printf("Usage: %s [-c] [-v] [-f] [-t] [-a] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
    122127        exit(-1);
    123128    }
     
    166171        {
    167172            printf ("Too many arguments\n");
    168             printf("Usage: %s [-c] [-v] [-f] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
     173            printf("Usage: %s [-c] [-v] [-f] [-t] [-a] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
    169174            exit(-1);
    170175        }
     
    188193
    189194    RE_Compiler* re_compiler = new RE_Compiler();
    190     if (compile_time_option) timer = getElapsedTime();
    191     LLVM_Gen_RetVal llvm_codegen = re_compiler->compile(compile_time_option, "basis_bits.bit_", "temp", encoding ,(regex_from_file_option ? fileregex : inregex));
    192 
    193195    if (compile_time_option)
    194196    {
     197        cycles = get_hrcycles();
     198        timer = getElapsedTime();
     199    }
     200    LLVM_Gen_RetVal llvm_codegen = re_compiler->compile(compile_time_option,
     201                                                        ascii_only_option,
     202                                                        "basis_bits.bit_",
     203                                                        "temp",
     204                                                        encoding ,
     205                                                        (regex_from_file_option ? fileregex : inregex));
     206
     207    if (compile_time_option)
     208    {
     209        cycles = get_hrcycles() - cycles;
    195210        timer = getElapsedTime() - timer;
    196         std::cout << "Total Compile Time: " << timer <<  " seconds" << std::endl;
     211        std::cout << "Total compile time - cycles:       " << cycles << std::endl;
     212        std::cout << "Total compile time - milliseconds: " << timer << std::endl;
    197213    }
    198214
     
    277293
    278294        int copy_back_pos = 0;
     295
    279296
    280297        if (LF_scanner.count() > 0) {
  • icGREP/icgrep-devel/icgrep/llvm_gen.cpp

    r3854 r3914  
    66
    77#include "llvm_gen.h"
     8#include "printer_pablos.h"
    89
    910extern "C" {
     
    1819}
    1920
    20 LLVM_Generator::LLVM_Generator(std::string basis_pattern, int bits)
     21LLVM_Generator::LLVM_Generator(std::string basis_pattern, std::string lf_ccname, int bits)
    2122{
    2223    mBasis_Pattern = basis_pattern;
     24    m_lf_ccname = lf_ccname;
    2325    mBits = bits;
    2426    mInWhile = false;
     
    3840    InitializeNativeTarget();
    3941    std::string ErrStr;
    40     mExecutionEngine = EngineBuilder(mMod).setUseMCJIT(true).setErrorStr(&ErrStr).setOptLevel(CodeGenOpt::Default).create();
     42
     43    mExecutionEngine = EngineBuilder(mMod).setUseMCJIT(true).setErrorStr(&ErrStr).setOptLevel(CodeGenOpt::Level::Less).create();
    4144    if (!mExecutionEngine)
    4245    {
     
    6164    //Create the carry queue.
    6265    mCarryQueueIdx = 0;
     66    mCarryQueueSize = LLVM_Generator_Helper::CarryCount_PabloStatements(cg_state.stmtsl);
    6367
    6468    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunc_process_block,0);
     
    8185    Generate_PabloStatements(cg_state.stmtsl);
    8286    SetReturnMarker(cg_state.newsym, 0);
    83     SetReturnMarker("lex.cclf", 1);
     87    SetReturnMarker(m_lf_ccname, 1);
    8488
    8589    //Terminate the block
     
    98102    fpm.add(new DataLayout(*mExecutionEngine->getDataLayout()));
    99103
    100     fpm.add(createPromoteMemoryToRegisterPass());
     104    fpm.add(createPromoteMemoryToRegisterPass()); //Transform to SSA form.
     105
     106    fpm.add(createBasicAliasAnalysisPass());      //Provide basic AliasAnalysis support for GVN. (Global Value Numbering)
     107    fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     108    fpm.add(createCFGSimplificationPass());       //Simplify the control flow graph (deleting unreachable blocks, etc).
     109    fpm.add(createReassociatePass());             //Reassociate expressions.
     110    fpm.add(createGVNPass());                     //Eliminate common subexpressions.
    101111
    102112    fpm.doInitialization();
     
    110120    LLVM_Gen_RetVal retVal;
    111121    //Return the required size of the carry queue and a pointer to the process_block function.
    112     retVal.carry_q_size = LLVM_Generator_Helper::CarryCount_PabloStatements(cg_state.stmtsl);;
     122    retVal.carry_q_size = mCarryQueueSize;
    113123    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunc_process_block);
    114124
     
    293303    else if (While* whl = dynamic_cast<While*>(stmt))
    294304    {
    295         IRBuilder<> b(mBasicBlock);
    296 
    297         mWhileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunc_process_block, 0);
    298         mWhileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body",mFunc_process_block, 0);
    299         mWhileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end",mFunc_process_block, 0);
    300 
    301305        int idx = mCarryQueueIdx;
    302306
     307        //With this call to the while body we will account for all of the carry in values.
    303308        std::string returnMarker = Generate_PabloStatements(whl->getPSList());
    304309
    305         b.CreateBr(mWhileCondBlock);
    306         mBasicBlock = mWhileCondBlock;
    307         IRBuilder<> b_cond(mWhileCondBlock);
     310        BasicBlock*  whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunc_process_block, 0);
     311        BasicBlock*  whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body",mFunc_process_block, 0);
     312        BasicBlock*  whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end",mFunc_process_block, 0);
     313
     314        IRBuilder<> b(mBasicBlock);
     315        b.CreateBr(whileCondBlock);
     316        mBasicBlock = whileCondBlock;
     317        IRBuilder<> b_cond(whileCondBlock);
    308318
    309319        Value* expression_marker_value = Generate_PabloE(whl->getExpr());
    310        
    311320        // Use an i128 compare for simplicity and speed.
    312321        Value* cast_marker_value_1 = b_cond.CreateBitCast(expression_marker_value, IntegerType::get(mMod->getContext(), 128));
    313322        Value* int_tobool1 = b_cond.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), 128), 0));
    314         b_cond.CreateCondBr(int_tobool1, mWhileEndBlock, mWhileBodyBlock);
    315 
    316         //Note: Everything that happens during the recursive calls for the pablo statements in the body of this while loop will
    317         //happen within the basic block of the body of the while loop.  This strategy will not support kstars within
    318         //kstars, a more complex stragegy for basicblocks will have to be devised for that.
    319         mBasicBlock = mWhileBodyBlock;
    320 
    321         mInWhile = true;
    322         mCarryQueueIdx = idx;
     323        b_cond.CreateCondBr(int_tobool1, whileEndBlock, whileBodyBlock);
     324
     325        mBasicBlock = whileBodyBlock;
     326        mCarryQueueIdx = 0;
     327        //Store the current carry queue.
     328        Value* ptr_last_carry_q = mptr_carry_q;
     329
     330        IRBuilder<> b_wb1(mBasicBlock);
     331        //Create and initialize a new carry queue.
     332        Value* ptr_while_carry_q = b_wb1.CreateAlloca(m64x2Vect, b_wb1.getInt64(mCarryQueueSize - idx));
     333        for (int i=0; i<(mCarryQueueSize-idx); i++)
     334        {
     335            Value* carryq_idx1 = b_wb1.getInt64(i);
     336            Value* carryq_GEP1 = b_wb1.CreateGEP(ptr_while_carry_q, carryq_idx1);
     337            Value* void_1 = b_wb1.CreateStore(mConst_Aggregate_64x2_0, carryq_GEP1);
     338        }
     339
     340        //Point mptr_carry_q to the new local carry queue.
     341        mptr_carry_q = ptr_while_carry_q;
     342
    323343        returnMarker = Generate_PabloStatements(whl->getPSList());
    324         mInWhile = false;
    325         IRBuilder<> b_wb(mWhileBodyBlock);
    326         b_wb.CreateBr(mWhileCondBlock);
    327 
    328         mBasicBlock = mWhileEndBlock;
     344
     345        IRBuilder<> b_wb2(mBasicBlock);
     346        //Copy back to the last carry queue the carries from the execution of the while statement list.
     347        for (int c=0; c<(mCarryQueueSize-idx); c++)
     348        {
     349            Value* carryq_idx = b_wb2.getInt64(c);
     350            Value* carryq_GEP = b_wb2.CreateGEP(mptr_carry_q, carryq_idx);
     351            Value* carryq_value = b_wb2.CreateLoad(carryq_GEP);
     352
     353            Value* last_carryq_idx = b_wb2.getInt64(idx + c);
     354            Value* last_carryq_GEP = b_wb2.CreateGEP(ptr_last_carry_q, last_carryq_idx);
     355            Value* last_carryq_value = b_wb2.CreateLoad(last_carryq_GEP);
     356
     357            Value* new_carryq_value = b_wb2.CreateOr(carryq_value, last_carryq_value);
     358            Value* void_1 = b_wb2.CreateStore(new_carryq_value, last_carryq_GEP);
     359        }
     360
     361        b_wb2.CreateBr(whileCondBlock);
     362
     363        mBasicBlock = whileEndBlock;
     364        mptr_carry_q = ptr_last_carry_q;
     365        mCarryQueueIdx += idx;
    329366
    330367        retVal = returnMarker;
     
    419456
    420457        Value* packed_shuffle;
    421         if (mInWhile)
    422         {
    423             Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
    424             Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
    425             packed_shuffle = b.CreateShuffleVector(mConst_Aggregate_64x2_0, srli_1_value, const_packed_1, "packed_shuffle iw");
    426         }
    427         else
    428         {
    429             Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
    430             Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
    431             packed_shuffle = b.CreateShuffleVector(carryq_value, srli_1_value, const_packed_1, "packed_shuffle nw");
    432         }
     458        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
     459        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
     460        packed_shuffle = b.CreateShuffleVector(carryq_value, srli_1_value, const_packed_1, "packed_shuffle nw");
    433461
    434462        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
     
    442470        Value* srli_2_value = b.CreateLShr(cast_marker_value_1, 127);
    443471        Value* carryout_2_carry = b.CreateBitCast(srli_2_value, m64x2Vect);
    444 
    445         if (mInWhile)
    446         {
    447             Value* carryout = b.CreateOr(carryq_value, carryout_2_carry);
    448             Value* void_1 = b.CreateStore(carryout, carryq_GEP);
    449         }
    450         else
    451         {
    452             Value* void_1 = b.CreateStore(carryout_2_carry, carryq_GEP);
    453         }
     472        Value* void_1 = b.CreateStore(carryout_2_carry, carryq_GEP);
    454473
    455474        //Increment the idx for the next advance or scan through.
  • icGREP/icgrep-devel/icgrep/llvm_gen.h

    r3850 r3914  
    1111#include "re_re.h"
    1212#include "re_cc.h"
     13#include "re_name.h"
    1314#include "re_start.h"
    1415#include "re_end.h"
     
    1617#include "re_alt.h"
    1718#include "re_rep.h"
    18 
    19 #include "rl_replimit.h"
    20 #include "rl_unbounded.h"
    21 #include "rl_upperbound.h"
    2219
    2320//Pablo Expressions
     
    10198{
    10299public:
    103     LLVM_Generator(std::string basis_pattern, int bits);
     100    LLVM_Generator(std::string basis_pattern, std::string lf_ccname, int bits);
    104101    ~LLVM_Generator();
    105     LLVM_Gen_RetVal Generate_LLVMIR(CodeGenState cg_state, std::list<PabloS*> cc_cgo);
     102    LLVM_Gen_RetVal Generate_LLVMIR(CodeGenState cg_state,
     103                                    std::list<PabloS*> cc_cgo);
    106104    void Print_Register(char* name, BitBlock bit_block);
    107105private:
     
    117115
    118116    int         mBits;
     117    std::string m_lf_ccname;
    119118    std::string mBasis_Pattern;
    120119
    121120    Module*          mMod;
    122121    BasicBlock*      mBasicBlock;
    123     BasicBlock*      mWhileCondBlock;
    124     BasicBlock*      mWhileBodyBlock;
    125     BasicBlock*      mWhileEndBlock;
     122
    126123    ExecutionEngine* mExecutionEngine;
    127124
     
    138135    int         mCarryQueueIdx;
    139136    Value*      mptr_carry_q;
     137
     138    int         mCarryQueueSize;
    140139
    141140    ConstantInt*           mConst_int64_neg1;
  • icGREP/icgrep-devel/icgrep/pbix_compiler.cpp

    r3850 r3914  
    77#include "pbix_compiler.h"
    88
    9 Pbix_Compiler::Pbix_Compiler(){
    10   symgen = SymbolGenerator();
     9Pbix_Compiler::Pbix_Compiler(std::string lf_ccname)
     10{
     11    m_lf_ccname = lf_ccname;
     12    symgen = SymbolGenerator();
    1113}
    1214
    1315CodeGenState Pbix_Compiler::compile(RE *re)
    14 {
     16{   
    1517    std::string gs_retVal;
    1618    gs_retVal = symgen.gensym("start_marker");
     
    2426    //These three lines are specifically for grep.
    2527    gs_retVal = symgen.gensym("marker");
    26     cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new MatchStar(new Var(cg_state.newsym), new Not(new Var("lex.cclf"))), new Var("lex.cclf"))));
     28    cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new MatchStar(new Var(cg_state.newsym), new Not(new Var(m_lf_ccname))), new Var(m_lf_ccname))));
    2729    cg_state.newsym = gs_retVal;
    2830
     
    3234CodeGenState Pbix_Compiler::re2pablo_helper(RE *re, CodeGenState cg_state)
    3335{
    34     if (CC* cc = dynamic_cast<CC*>(re))
     36    if (Name* name = dynamic_cast<Name*>(re))
    3537    {
    3638        std::string gs_retVal = symgen.gensym("marker");
    37         cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(cc->getName())))));
     39        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(name->getName())))));
    3840        cg_state.newsym = gs_retVal;
    3941
     
    4345    {
    4446        std::string gs_retVal = symgen.gensym("start_of_line_marker");
    45         cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new Not(new Advance(new Not(new CharClass("lex.cclf")))))));
     47        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new Not(new Advance(new Not(new CharClass(m_lf_ccname)))))));
    4648        cg_state.newsym = gs_retVal;
    4749    }
     
    4951    {
    5052        std::string gs_retVal = symgen.gensym("end_of_line_marker");
    51         cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new CharClass("lex.cclf"))));
     53        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new CharClass(m_lf_ccname))));
    5254        cg_state.newsym = gs_retVal;
    5355    }
     
    8688    else if (Rep* rep = dynamic_cast<Rep*>(re))
    8789    {
    88         if ((dynamic_cast<CC*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (dynamic_cast<Unbounded*>(rep->getUB())!= 0))
     90        if ((dynamic_cast<Name*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (rep->getUB()== unboundedRep))
    8991        {
    9092            //std::cout << "Matchstar!" << std::endl;
    91             CC* rep_cc = dynamic_cast<CC*>(rep->getRE());
     93
     94            Name* rep_name = dynamic_cast<Name*>(rep->getRE());
    9295            std::string gs_retVal = symgen.gensym("marker");
    93             cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), new CharClass(rep_cc->getName()))));
     96            cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), new CharClass(rep_name->getName()))));
    9497            cg_state.newsym = gs_retVal;
    9598        }
    96         else if (dynamic_cast<Unbounded*>(rep->getUB()) != 0)
     99        else if (rep->getUB() == unboundedRep)
    97100        {
    98101            if (rep->getLB() == 0)
    99102            {
    100                 //std::cout << "While, no lb." << std::endl; //THIS IS THE ONE THAT ISN'T WORKING.
     103                //std::cout << "While, no lb." << std::endl;
    101104
    102105                std::string while_test_gs_retVal = symgen.gensym("while_test");
     
    121124            }
    122125        }
    123         else if (dynamic_cast<UpperBound*>(rep->getUB()) != 0)
     126        else if (rep->getUB() != unboundedRep)
    124127        {
    125             UpperBound* ub = dynamic_cast<UpperBound*>(rep->getUB());
    126             if ((rep->getLB() == 0) && (ub->getUB() == 0))
     128            if ((rep->getLB() == 0) && (rep->getUB() == 0))
    127129            {
    128130                //Just fall through...do nothing.
    129131            }
    130             else if ((rep->getLB() == 0) && (ub->getUB() > 0))
     132            else if ((rep->getLB() == 0) && (rep->getUB() > 0))
    131133            {
    132134                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
    133                 ub->setUB(ub->getUB() - 1);
     135                rep->setUB(rep->getUB() - 1);
    134136                CodeGenState t2_cg_state = re2pablo_helper(re, t1_cg_state);
    135137                std::string gs_retVal = symgen.gensym("alt_marker");
     
    138140                cg_state.newsym = gs_retVal;
    139141            }
    140             else //if ((rep->getLB() > 0) && (ub->getUB() > 0))
     142            else //if ((rep->getLB() > 0) && (rep->getUB() > 0))
    141143            {
    142144                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
    143145                rep->setLB(rep->getLB() - 1);
    144                 ub->setUB(ub->getUB() - 1);
     146                rep->setUB(rep->getUB() - 1);
    145147                cg_state = re2pablo_helper(rep, t1_cg_state);
    146148            }
  • icGREP/icgrep-devel/icgrep/pbix_compiler.h

    r3850 r3914  
    1010//Regular Expressions
    1111#include "re_re.h"
    12 #include "re_cc.h"
     12#include "re_name.h"
    1313#include "re_start.h"
    1414#include "re_end.h"
     
    1616#include "re_alt.h"
    1717#include "re_rep.h"
    18 
    19 #include "rl_replimit.h"
    20 #include "rl_unbounded.h"
    21 #include "rl_upperbound.h"
    2218
    2319//Pablo Expressions
     
    4945#include <vector>
    5046
    51 //***********************************
    52 //TODO: Just for development
    53 //#include "printer_pablos.h"
    54 //***********************************
    5547
    5648struct CodeGenState{
     
    6254{
    6355public:
    64     Pbix_Compiler();
     56    Pbix_Compiler(std::string lf_ccname);
    6557    CodeGenState compile(RE *re);
    6658private:
     
    7062
    7163    SymbolGenerator symgen;
     64    std::string m_lf_ccname;
    7265};
    7366
  • icGREP/icgrep-devel/icgrep/printer_pablos.cpp

    r3850 r3914  
    77#include "printer_pablos.h"
    88
    9 StatementPrinter::StatementPrinter(){}
    109
    1110std::string StatementPrinter::PrintStmts(CodeGenState cg_state)
     
    108107    else if (MatchStar* mstar = dynamic_cast<MatchStar*>(expr))
    109108    {
    110         retVal = "MarchStar (" + ShowPabloE(mstar->getExpr1()) + ", " + ShowPabloE(mstar->getExpr2()) + ")";
     109        retVal = "MatchStar (" + ShowPabloE(mstar->getExpr1()) + ", " + ShowPabloE(mstar->getExpr2()) + ")";
    111110    }
    112111
  • icGREP/icgrep-devel/icgrep/printer_pablos.h

    r3850 r3914  
    5353    static std::string ShowPabloE(PabloE* expr);
    5454    static std::string ShowPabloS(PabloS* stmt);
    55 private:
    56     StatementPrinter();
    5755};
    5856
  • icGREP/icgrep-devel/icgrep/printer_re.cpp

    r3850 r3914  
    77#include "printer_re.h"
    88
    9 
    10 Printer_RE::Printer_RE(){}
    119
    1210std::string Printer_RE::PrintRE(RE* re)
     
    4543        //retVal += " is codepoint 47 a member: " + member;
    4644
    47         /*
     45/*
    4846        retVal += "CC \"";
    4947        retVal += re_cc->getName();
     
    6361            retVal += "]";
    6462        }
    65         */
     63*/
     64    }
     65    else if (Name* re_name = dynamic_cast<Name*>(re))
     66    {
     67        retVal += "Name \"";
     68        retVal += re_name->getName();
     69        retVal += "\" ";
    6670    }
    6771    else if (End* re_end = dynamic_cast<End*>(re))
     
    7276    {
    7377        retVal += "Rep("  + PrintRE(re_rep->getRE()) + "," + INT2STRING(re_rep->getLB()) + ",";
    74 
    75         if (Unbounded* unbounded = dynamic_cast<Unbounded*>(re_rep->getUB()))
    76         {
    77             retVal += "Unbounded)";
    78         }
    79         else if (UpperBound* upperbound = dynamic_cast<UpperBound*>(re_rep->getUB()))
    80         {
    81             retVal += "UpperBound " + INT2STRING(upperbound->getUB()) + ")";
    82         }
     78        retVal += (re_rep->getUB() == unboundedRep ? "Unbounded" : "UpperBound(" + INT2STRING(re_rep->getUB()) + ")");
    8379    }
    8480    else if (Seq* re_seq = dynamic_cast<Seq*>(re))
  • icGREP/icgrep-devel/icgrep/printer_re.h

    r3850 r3914  
    1212#include "re_alt.h"
    1313#include "re_cc.h"
     14#include "re_name.h"
    1415#include "re_end.h"
    1516#include "re_rep.h"
    1617#include "re_seq.h"
    1718#include "re_start.h"
    18 
    19 #include "rl_replimit.h"
    20 #include "rl_unbounded.h"
    21 #include "rl_upperbound.h"
    2219
    2320#include <iostream>
     
    3229public:
    3330    static std::string PrintRE(RE* re);
    34 private:
    35     Printer_RE();
    3631};
    3732
  • icGREP/icgrep-devel/icgrep/re_alt.cpp

    r3850 r3914  
    1818    it=lst->begin();
    1919    mList->assign(it, lst->end());
    20     mList->reverse();
     20    std::reverse(mList->begin(), mList->end());
     21}
     22
     23Alt::Alt(std::list<RE*> lst)
     24{
     25    mList = new std::list<RE*>();
     26    std::list<RE*>::iterator it;
     27    it=lst.begin();
     28    mList->assign(it, lst.end());
     29    std::reverse(mList->begin(), mList->end());
    2130}
    2231
  • icGREP/icgrep-devel/icgrep/re_alt.h

    r3850 r3914  
    99
    1010#include "re_re.h"
     11#include <algorithm>
    1112#include <list>
     13
    1214
    1315class Alt : public RE
     
    1618    Alt();
    1719    Alt(std::list<RE*>* lst);
     20    Alt(std::list<RE*> lst);
    1821    ~Alt();
    1922    std::list<RE*>* GetREList();
  • icGREP/icgrep-devel/icgrep/re_cc.cpp

    r3850 r3914  
    3838}
    3939
     40CC::CC(CC *cc1, CC *cc2)
     41{
     42    gensym_name();
     43    mSparceCharSet = cc2->getItems();
     44    joinCharSets(cc1->getItems());
     45}
     46
    4047CC::~CC(){}
    4148
     
    4754std::string CC::getName()
    4855{
    49     return mName;
     56    std::string name = "CC";
     57
     58    std::vector<CharSetItem>::iterator it;
     59    for (it = mSparceCharSet.begin(); it != mSparceCharSet.end(); ++it)
     60    {
     61        name += INT2STRING(it->lo_codepoint);
     62        name += INT2STRING(it->hi_codepoint);
     63    }
     64
     65    return name;
     66}
     67
     68std::string CC::getId()
     69{
     70    return mId;
    5071}
    5172
     
    7899            return true;
    79100        }
     101    }
     102}
     103
     104void CC::joinCharSets(std::vector<CharSetItem> items1)
     105{
     106    joinCharSets_helper(items1, items1.size() - 1);
     107}
     108
     109void CC::joinCharSets_helper(std::vector<CharSetItem> items1, int idx)
     110{
     111    if (idx > -1)
     112    {
     113        CharSetItem item = items1.at(idx);
     114        insert_range(item.lo_codepoint, item.hi_codepoint);
     115        idx--;
     116        joinCharSets_helper(items1, idx);
    80117    }
    81118}
     
    255292void CC::gensym_name()
    256293{
    257     mName = "lex.CC" + INT2STRING(msCSIidx);
     294    mId = "lex.CC" + INT2STRING(msCSIidx);
    258295    msCSIidx++;
    259296}
  • icGREP/icgrep-devel/icgrep/re_cc.h

    r3850 r3914  
    3131    CC(std::string name, int codepoint);
    3232    CC(std::string name, int lo_codepoint, int hi_codepoint);
     33    CC(CC* cc1, CC* cc2);
    3334    ~CC();
    3435    std::vector<CharSetItem> getItems();
    3536    std::string getName();
     37    std::string getId();
    3638    bool is_member(int codepoint);
    3739    void insert1(int codepoint);
     
    4850    void gensym_name();
    4951    bool is_member_helper(int codepoint, int idx);
     52    void joinCharSets(std::vector<CharSetItem> items1);
     53    void joinCharSets_helper(std::vector<CharSetItem> items1, int idx);
    5054    void insert_range_helper(int lo_codepoint, int hi_codepoint, int idx);
    5155    void negate_class_helper(int idx, int b);
     
    5458    std::vector<CharSetItem> mSparceCharSet;
    5559    std::string mName;
     60    std::string mId;
    5661};
    5762
  • icGREP/icgrep-devel/icgrep/re_compiler.cpp

    r3850 r3914  
    99RE_Compiler::RE_Compiler(){}
    1010
    11 LLVM_Gen_RetVal RE_Compiler::compile(bool show_compile_time, std::string basis_pattern, std::string gensym_pattern, UTF_Encoding encoding, std::string input_string)
     11LLVM_Gen_RetVal RE_Compiler::compile(bool show_compile_time,
     12                                     bool ascii_only,
     13                                     std::string basis_pattern,
     14                                     std::string gensym_pattern,
     15                                     UTF_Encoding encoding,
     16                                     std::string input_string)
    1217{
    13     RE_Parser parser;
    1418
    15     ParseResult* parse_result = parser.parse_re(input_string);
     19    ParseResult* parse_result = RE_Parser::parse_re(input_string);
    1620
    17     RE* parsed_re = 0;
     21    RE* re_ast = 0;
    1822    if (ParseSuccess* success = dynamic_cast<ParseSuccess*>(parse_result))
    1923    {
    20         parsed_re = success->getRE();
     24        re_ast = success->getRE();
    2125    }
    2226    else if (ParseFailure* failure = dynamic_cast<ParseFailure*>(parse_result))
     
    3236
    3337    //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
    34     //std::cout << "\n" + Printer_RE::PrintRE(parsed_re) + "\n" << std::endl;
     38    //std::cout << "\nParser:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
    3539
    3640    //Add the UTF encoding.
    37     RE* utf_encoded_re = 0;
    38     if (encoding.getName().compare("UTF-8") == 0)
     41    if (!ascii_only)
    3942    {
    40         UTF8_Encoder utf8_encoder;
    41         utf_encoded_re = utf8_encoder.toUTF8(parsed_re);
    42     }
    43     else
    44     {
    45         std::cout << "Invalid encoding!" << std::endl;
    46         exit(1);
     43        if (encoding.getName().compare("UTF-8") == 0)
     44        {
     45            re_ast = UTF8_Encoder::toUTF8(re_ast);
     46        }
     47        else
     48        {
     49            std::cout << "Invalid encoding!" << std::endl;
     50            exit(1);
     51        }
    4752    }
    4853
    4954    //Print to the terminal the AST that was generated by the utf8 encoder.
    50     //std::cout << "\n" + Printer_RE::PrintRE(utf_encoded_re) + "\n" << std::endl;
     55    //std::cout << "\nUTF8-encoder:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     56
     57    //Optimization passes to simplify the AST.
     58    re_ast = RE_Simplifier::simplify(RE_Nullable::removeNullableSuffix(RE_Nullable::removeNullablePrefix(re_ast)));
     59
     60    //Print to the terminal the AST that was generated by the simplifier.
     61    //std::cout << "\nSimplifier:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     62
     63    //Map all of the unique character classes in order to reduce redundancy.
     64    std::map<std::string, RE*> re_map;
     65    re_ast = RE_Reducer::reduce(re_ast, re_map);
     66
     67    //Print to the terminal the AST with the reduced REs.
     68    //std::cout << "\nReducer:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
    5169
    5270    //Build our list of predefined characters.
    5371    std::list<CC*> predefined_characters;
    5472    CC* cc_lf = new CC("lex.cclf", '\n');
    55     predefined_characters.push_back(cc_lf);
     73    std::string lf_ccname = cc_lf->getName();
     74    re_map.insert(make_pair(lf_ccname, cc_lf));
    5675
    5776    CC_Compiler cc_compiler(encoding);
    58     std::list<PabloS*> cc_stmtsl = cc_compiler.compile(basis_pattern, gensym_pattern, utf_encoded_re, predefined_characters);
     77    std::list<PabloS*> cc_stmtsl = cc_compiler.compile(basis_pattern, gensym_pattern, re_map, predefined_characters);
    5978
    6079    //Print to the terminal the AST that was generated by the character class compiler.
    6180    //std::cout << "\n" << "(" << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << ")" << "\n" << std::endl;
    6281
    63     Pbix_Compiler pbix_compiler;
    64     CodeGenState cg_state = pbix_compiler.compile(utf_encoded_re);
     82    Pbix_Compiler pbix_compiler(lf_ccname);
     83    CodeGenState cg_state = pbix_compiler.compile(re_ast);
    6584
    6685    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    6786    //std::cout << "\n" << "(" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << std::endl;
    6887
    69     LLVM_Generator irgen(basis_pattern, encoding.getBits());
     88    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
     89    //std::cout << "\nPablo Statement Count: " << Pbix_Counter::Count_PabloStatements(cg_state.stmtsl) <<  "\n" << std::endl;
    7090
     91    LLVM_Generator irgen(basis_pattern, lf_ccname, encoding.getBits());
     92
     93    unsigned long long cycles = 0;
    7194    double timer = 0;
    72     if (show_compile_time) timer = getElapsedTime();
     95    if (show_compile_time)
     96    {
     97        cycles = get_hrcycles();
     98        timer = getElapsedTime();
     99    }
     100
    73101    LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(cg_state, cc_stmtsl);
    74102    if (show_compile_time)
    75103    {
     104        cycles = get_hrcycles() - cycles;
    76105        timer = getElapsedTime() - timer;
    77         std::cout << "LLVM Compile Time: " << timer <<  " seconds" << std::endl;
     106        std::cout << "LLVM compile time -  cycles:       " << cycles  << std::endl;
     107        std::cout << "LLVM compile time -  milliseconds: " << timer << std::endl;
    78108    }
    79109
    80     return retVal;
     110    return  retVal;  //irgen.Generate_LLVMIR(cg_state, cc_stmtsl);
    81111}
    82112
  • icGREP/icgrep-devel/icgrep/re_compiler.h

    r3850 r3914  
    1414#include "re_alt.h"
    1515#include "re_cc.h"
     16#include "re_name.h"
    1617#include "re_end.h"
    1718#include "re_rep.h"
    1819#include "re_seq.h"
    1920#include "re_start.h"
    20 
    21 #include "rl_replimit.h"
    22 #include "rl_unbounded.h"
    23 #include "rl_upperbound.h"
     21#include "re_nullable.h"
     22#include "re_simplifier.h"
     23#include "re_reducer.h"
    2424
    2525#include "printer_pablos.h"
     
    4141#include "llvm_gen.h"
    4242
     43//FOR TESTING AND AND ANALYSIS
     44//#include "pbix_counter.h"
     45
    4346#include <fstream>
    4447#include <iostream>
     
    5760public:
    5861    RE_Compiler();
    59     LLVM_Gen_RetVal compile(bool show_compile_time, std::string basis_pattern, std::string gensym_pattern, UTF_Encoding encoding ,std::string input_string);
    60 private:
    61 
     62    LLVM_Gen_RetVal compile(bool show_compile_time,
     63                            bool ascii_only,
     64                            std::string basis_pattern,
     65                            std::string gensym_pattern,
     66                            UTF_Encoding encoding ,
     67                            std::string input_string);
    6268};
    6369
  • icGREP/icgrep-devel/icgrep/re_parser.cpp

    r3850 r3914  
    77#include "re_parser.h"
    88
    9 RE_Parser::RE_Parser(){}
    109
    1110ParseResult* RE_Parser::parse_re(std::string input_string)
     
    6665    {
    6766        if (form_result.remaining.operator [](0) == '|')
    68         {
     67        {           
    6968            parse_re_list_retVal t1_re_list_retVal =
    7069                    parse_re_alt_form_list(form_result.remaining.substr(1, form_result.remaining.length() - 1));
    71 
    7270            std::list<RE*>::iterator it;
    7371            it=t1_re_list_retVal.re_list.begin();
    7472            re_list_retVal.re_list.assign(it, t1_re_list_retVal.re_list.end());
    75             re_list_retVal.re_list.push_back(re_success->getRE());
    7673            re_list_retVal.remaining = t1_re_list_retVal.remaining;
    7774        }
     
    205202     if (s.operator [](0) == '*')
    206203     {
    207          return extend_item(new Rep(re, 0, new Unbounded), s.substr(1, s.length() - 1));
     204         return extend_item(new Rep(re, 0, unboundedRep), s.substr(1, s.length() - 1));
    208205     }
    209206     else if (s.operator[](0) == '?')
    210207     {
    211          return extend_item(new Rep(re, 0, new UpperBound(1)), s.substr(1, s.length() - 1));
     208         return extend_item(new Rep(re, 0, 1), s.substr(1, s.length() - 1));
    212209     }
    213210     else if (s.operator[](0) == '+')
    214211     {
    215          return extend_item(new Rep(re, 1, new Unbounded), s.substr(1, s.length() - 1));
     212         return extend_item(new Rep(re, 1, unboundedRep), s.substr(1, s.length() - 1));
    216213     }
    217214     else if (s.operator[](0) == '{')
     
    222219        {
    223220            extend_item_retVal =
    224                     extend_item(new Rep(re, int_retVal.i, new UpperBound(int_retVal.i)), int_retVal.remaining.substr(1, int_retVal.remaining.length() - 1));
     221                    extend_item(new Rep(re, int_retVal.i, int_retVal.i), int_retVal.remaining.substr(1, int_retVal.remaining.length() - 1));
    225222
    226223        }
     
    228225        {
    229226            extend_item_retVal =
    230                     extend_item(new Rep(re, int_retVal.i, new Unbounded), int_retVal.remaining.substr(2, int_retVal.remaining.length() - 2));
     227                    extend_item(new Rep(re, int_retVal.i, unboundedRep), int_retVal.remaining.substr(2, int_retVal.remaining.length() - 2));
    231228
    232229        }
     
    238235            {
    239236                extend_item_retVal =
    240                         extend_item(new Rep(re, int_retVal.i, new UpperBound(t1_int_retVal.i)), t1_int_retVal.remaining.substr(1, t1_int_retVal.remaining.length() - 1));
     237                        extend_item(new Rep(re, int_retVal.i, t1_int_retVal.i), t1_int_retVal.remaining.substr(1, t1_int_retVal.remaining.length() - 1));
    241238            }
    242239            else
  • icGREP/icgrep-devel/icgrep/re_parser.h

    r3850 r3914  
    3838};
    3939
     40struct parse_re_vector_retVal{
     41    std::vector<RE*> re_vector;
     42    std::string remaining;
     43};
     44
    4045class RE_Parser
    4146{
    4247public:
    43     RE_Parser();
     48    //RE_Parser();
    4449    //The module exports the parse result.
    45     ParseResult* parse_re(std::string intput_string);
     50    static ParseResult* parse_re(std::string intput_string);
    4651private:
    47     parse_result_retVal parse_re_helper(std::string s);
    48     parse_re_list_retVal parse_re_alt_form_list(std::string s);
    49     parse_result_retVal parse_re_form(std::string s);
    50     parse_re_list_retVal parse_re_item_list(std::string s);
    51     parse_result_retVal parse_re_item(std::string s);
    52     parse_result_retVal parse_re_unit(std::string s);
    53     parse_result_retVal extend_item(RE* re, std::string s);
    54     parse_result_retVal parse_cc(std::string s);
    55     parse_result_retVal parse_cc_body(std::string s);
    56     parse_result_retVal parse_cc_body0(std::string s, CC* cc_sofar);
    57     parse_result_retVal parse_cc_body1(int chr, std::string s, CC* cc_sofar);
     52    static parse_result_retVal parse_re_helper(std::string s);
     53    static parse_re_list_retVal parse_re_alt_form_list(std::string s);
     54    static parse_result_retVal parse_re_form(std::string s);
     55    static parse_re_list_retVal parse_re_item_list(std::string s);
     56    static parse_result_retVal parse_re_item(std::string s);
     57    static parse_result_retVal parse_re_unit(std::string s);
     58    static parse_result_retVal extend_item(RE* re, std::string s);
     59    static parse_result_retVal parse_cc(std::string s);
     60    static parse_result_retVal parse_cc_body(std::string s);
     61    static parse_result_retVal parse_cc_body0(std::string s, CC* cc_sofar);
     62    static parse_result_retVal parse_cc_body1(int chr, std::string s, CC* cc_sofar);
    5863
    59     parse_int_retVal parse_hex(std::string s);
    60     parse_int_retVal parse_hex_body(int i, std::string s);
    61     int parse_hex_body1(int i, std::string hex_str);
     64    static parse_int_retVal parse_hex(std::string s);
     65    static parse_int_retVal parse_hex_body(int i, std::string s);
     66    static int parse_hex_body1(int i, std::string hex_str);
    6267
    63     parse_int_retVal parse_int(std::string s);
    64     parse_int_retVal parse_int1(int i, std::string s);
    65     parse_result_retVal negate_cc_result(parse_result_retVal cc_result);
     68    static parse_int_retVal parse_int(std::string s);
     69    static parse_int_retVal parse_int1(int i, std::string s);
     70    static parse_result_retVal negate_cc_result(parse_result_retVal cc_result);
    6671};
    6772
  • icGREP/icgrep-devel/icgrep/re_rep.cpp

    r3850 r3914  
    77#include "re_rep.h"
    88
    9 Rep::Rep(RE* re, int lb, RepLimit* ub)
     9Rep::Rep(RE* re, int lb, int ub)
    1010{
    1111    mRE = re;
     
    1717{
    1818    delete mRE;
    19     delete mUB;
    2019}
    2120
     
    3534}
    3635
    37 RepLimit* Rep::getUB()
     36int Rep::getUB()
    3837{
    3938    return mUB;
    4039}
    4140
     41void Rep::setUB(int ub)
     42{
     43    mUB = ub;
     44}
    4245
     46
  • icGREP/icgrep-devel/icgrep/re_rep.h

    r3850 r3914  
    99
    1010#include "re_re.h"
    11 #include "rl_replimit.h"
     11
     12const int unboundedRep = -1;
    1213
    1314class Rep : public RE
    1415{
    1516public:
    16     Rep(RE* re, int lb, RepLimit* ub);
     17    Rep(RE* re, int lb, int ub);
    1718    ~Rep();
    1819    RE* getRE();
    1920    int getLB();
    2021    void setLB(int lb);
    21     RepLimit* getUB();
     22    int getUB();
     23    void setUB(int ub);
    2224private:
    2325    RE* mRE;
    2426    int mLB;
    25     RepLimit* mUB;
     27    int mUB;
    2628};
    2729
  • icGREP/icgrep-devel/icgrep/re_seq.cpp

    r3850 r3914  
    2121}
    2222
     23Seq::Seq(std::list<RE*> lst)
     24{
     25    mList = new std::list<RE*>();
     26    std::list<RE*>::iterator it;
     27    it=lst.begin();
     28    mList->assign(it, lst.end());
     29    mList->reverse();
     30}
     31
    2332Seq::~Seq()
    2433{
  • icGREP/icgrep-devel/icgrep/re_seq.h

    r3850 r3914  
    1616    Seq();
    1717    Seq(std::list<RE*>* lst);
     18    Seq(std::list<RE*> lst);
    1819    ~Seq();
    1920    std::list<RE*>* GetREList();
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r3854 r3914  
    77#include "utf8_encoder.h"
    88
    9 UTF8_Encoder::UTF8_Encoder(){}
    10 
    11 RE* UTF8_Encoder::toUTF8(RE *re)
     9
     10RE* UTF8_Encoder::toUTF8(RE* re)
    1211{
    1312    RE* retVal = 0;
    1413
    1514    if (Alt* re_alt = dynamic_cast<Alt*>(re))
     15    {
     16        std::list<RE*> re_list;
     17        std::list<RE*>::reverse_iterator rit = re_alt->GetREList()->rbegin();
     18
     19        for (rit = re_alt->GetREList()->rbegin(); rit != re_alt->GetREList()->rend(); ++rit)
     20        {
     21            re_list.push_back(toUTF8(*rit));
     22        }
     23
     24        retVal = new Alt(&re_list);
     25    }
     26    else if (Seq* re_seq = dynamic_cast<Seq*>(re))
    1627    {
    1728        std::list<RE*> re_list;
    1829        std::list<RE*>::iterator it;
    1930
    20         for (it = re_alt->GetREList()->begin(); it != re_alt->GetREList()->end(); ++it)
     31        for (it = re_seq->GetREList()->begin(); it != re_seq->GetREList()->end(); ++it)
    2132        {
    2233            re_list.push_front(toUTF8(*it));
    2334        }
    2435
    25         retVal = new Alt(&re_list);
    26     }
    27     else if (Seq* re_seq = dynamic_cast<Seq*>(re))
    28     {
    29         std::list<RE*> re_list;
    30         std::list<RE*>::iterator it;
    31 
    32         for (it = re_seq->GetREList()->begin(); it != re_seq->GetREList()->end(); ++it)
    33         {
    34             re_list.push_front(toUTF8(*it));
    35         }
    36 
    3736        retVal = new Seq(&re_list);
    3837    }
    3938    else if (Rep* re_rep = dynamic_cast<Rep*>(re))
    4039    {
    41         RepLimit* replimit;
    42         if (UpperBound* unbounded = dynamic_cast<UpperBound*>(re_rep->getUB()))
    43         {
    44             replimit = new UpperBound(unbounded->getUB());
    45         }
    46         else
    47         {
    48             replimit = new Unbounded();
    49         }
    50 
    51         retVal = new Rep(toUTF8(re_rep->getRE()), re_rep->getLB(), replimit);
     40        retVal = new Rep(toUTF8(re_rep->getRE()), re_rep->getLB(), re_rep->getUB());
    5241    }
    5342    else if (CC* re_cc = dynamic_cast<CC*>(re))
     
    6453                re_list.push_back(rangeToUTF8(re_cc->getItems().at(i)));
    6554            }
    66             retVal = new Alt(&re_list);
     55            retVal = RE_Simplifier::mkAlt(&re_list);
     56            //retVal = new Alt(&re_list);
    6757        }
    6858    }
     
    191181    else
    192182    {
    193         retVal = 0x80 | ((codepoint >> (6 * (len - n))) & 0x3F);
     183        retVal = 0x80 | (codepoint >> (6 * (len - n))) & 0x3F;
    194184    }
    195185
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r3850 r3914  
    1717#include "re_rep.h"
    1818
    19 #include "rl_replimit.h"
    20 #include "rl_unbounded.h"
    21 #include "rl_upperbound.h"
     19#include "re_simplifier.h"
    2220
    2321class UTF8_Encoder
    2422{
    2523public:
    26     UTF8_Encoder();
    27     RE* toUTF8(RE* re);
     24    static RE* toUTF8(RE* re);
    2825private:
    29     RE* rangeToUTF8(CharSetItem item);
    30     RE* rangeToUTF8_helper(int lo, int hi, int n, int hlen);
    31     CC* makeByteClass(int byteval);
    32     CC* makeByteRange(int lo, int hi);
     26    static RE* rangeToUTF8(CharSetItem item);
     27    static RE* rangeToUTF8_helper(int lo, int hi, int n, int hlen);
     28    static CC* makeByteClass(int byteval);
     29    static CC* makeByteRange(int lo, int hi);
    3330
    34     int u8len(int cp);
    35     int max_of_u8len(int lgth);
    36     int u8byte(int codepoint, int n);
     31    static int u8len(int cp);
     32    static int max_of_u8len(int lgth);
     33    static int u8byte(int codepoint, int n);
    3734};
    3835
Note: See TracChangeset for help on using the changeset viewer.