Ignore:
Timestamp:
Sep 27, 2014, 11:12:13 PM (5 years ago)
Author:
nmedfort
Message:

Modified RE module to use a LLVM-like dyn_cast system; added 'make' functions to hide RE constructors.

Location:
icGREP/icgrep-devel/icgrep
Files:
41 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc_compiler.cpp

    r4187 r4194  
    99#include "utf_encoding.h"
    1010#include "cc_compiler_helper.h"
    11 #include "pe_sel.h"
     11
     12//Pablo Expressions
    1213#include "pe_advance.h"
    1314#include "pe_all.h"
    1415#include "pe_and.h"
     16#include "pe_call.h"
    1517#include "pe_charclass.h"
    1618#include "pe_matchstar.h"
    1719#include "pe_not.h"
    1820#include "pe_or.h"
     21#include "pe_pabloe.h"
     22#include "pe_scanthru.h"
     23#include "pe_sel.h"
    1924#include "pe_var.h"
    2025#include "pe_xor.h"
    2126
    22 #include <math.h>
     27//Pablo Statements
     28#include "ps_pablos.h"
     29#include "ps_assign.h"
     30#include "ps_if.h"
     31#include "ps_while.h"
     32
     33#include "re/re_alt.h"
     34#include "re/re_cc.h"
     35#include "re/re_seq.h"
     36#include "re/re_rep.h"
     37
    2338#include <utility>
    24 #include <iostream>
    2539#include <string>
    2640#include <list>
     
    3145#include <stdlib.h>
    3246#include <stdexcept>
     47
     48using namespace re;
    3349
    3450CC_Compiler::CC_Compiler(const UTF_Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern)
     
    107123
    108124void CC_Compiler::process_re(const RE* re) {
    109     if (const Alt* re_alt = dynamic_cast<const Alt*>(re)) {
     125    if (const Alt* re_alt = dyn_cast<const Alt>(re)) {
    110126        for (const RE * re : *re_alt) {
    111127            process_re(re);
    112128        }
    113129    }
    114     else if (const CC* re_cc = dynamic_cast<const CC*>(re)) {
     130    else if (const CC* re_cc = dyn_cast<const CC>(re)) {
    115131        cc2pablos(re_cc);
    116132    }
    117     else if (const Rep* re_rep = dynamic_cast<const Rep*>(re)) {
     133    else if (const Rep* re_rep = dyn_cast<const Rep>(re)) {
    118134        process_re(re_rep->getRE());
    119135    }
    120     else if (const Seq* re_seq = dynamic_cast<const Seq*>(re)) {
     136    else if (const Seq* re_seq = dyn_cast<const Seq>(re)) {
    121137        for (const RE * re : *re_seq) {
    122138            process_re(re);
     
    179195PabloE* CC_Compiler::make_range(const CodePointType n1, const CodePointType n2)
    180196{
    181     CodePointType diff_bits = n1 ^ n2;
    182197    CodePointType diff_count = 0;
    183198
    184     while (diff_bits > 0)
    185     {
    186         diff_count++;
    187         diff_bits >>= 1;
    188     }
     199    for (CodePointType diff_bits = n1 ^ n2; diff_bits; diff_count++, diff_bits >>= 1);
    189200
    190201    if ((n2 < n1) || (diff_count > mEncoding.getBits()))
     
    195206    const CodePointType mask0 = (static_cast<CodePointType>(1) << diff_count) - 1;
    196207
    197     PabloE* common = bit_pattern_expr(n1 & ~mask0, mEncoding.getMask() ^ mask0);
     208    PabloE * common = bit_pattern_expr(n1 & ~mask0, mEncoding.getMask() ^ mask0);
     209
    198210    if (diff_count == 0) return common;
    199211
  • icGREP/icgrep-devel/icgrep/cc_compiler.h

    r4187 r4194  
    2222
    2323class CC_Compiler{
    24     typedef std::map<std::string, RE*>          REMap;
     24    typedef std::map<std::string, re::RE*>      REMap;
    2525    typedef std::map<std::string, Expression*>  ExpressionMap;
    2626    typedef ExpressionMap::iterator             MapIterator;
     
    2828public:
    2929    CC_Compiler(const UTF_Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern);
    30     std::string compile1(CC* cc);   
     30    std::string compile1(re::CC* cc);
    3131    void compile_from_map(const REMap & re_map);
    3232    std::list<PabloS*> get_compiled();
    3333private:
    3434    void process_re_map(const REMap &re_map);
    35     void process_re(const RE *re);
     35    void process_re(const re::RE *re);
    3636    std::string bit_var(int n);
    3737    PabloE* make_bitv(int n);
    3838    PabloE* bit_pattern_expr(int pattern, int selected_bits);
    39     PabloE* char_test_expr(const CodePointType ch);
    40     PabloE* make_range(const CodePointType n1, const CodePointType n2);
     39    PabloE* char_test_expr(const re::CodePointType ch);
     40    PabloE* make_range(const re::CodePointType n1, const re::CodePointType n2);
    4141    PabloE* GE_Range(int N, int n);
    4242    PabloE* LE_Range(int N, int n);
    43     PabloE* char_or_range_expr(const CodePointType lo, const CodePointType hi);
    44     PabloE* charset_expr(const CC *cc);
     43    PabloE* char_or_range_expr(const re::CodePointType lo, const re::CodePointType hi);
     44    PabloE* charset_expr(const re::CC *cc);
    4545    Expression* expr2pabloe(PabloE* expr);
    46     void cc2pablos(const CC *cc);
     46    void cc2pablos(const re::CC *cc);
    4747
    4848    UTF_Encoding mEncoding;
  • icGREP/icgrep-devel/icgrep/cc_compiler_helper.cpp

    r4187 r4194  
    66
    77#include "cc_compiler_helper.h"
     8//Pablo Expressions
     9#include "pe_advance.h"
     10#include "pe_all.h"
     11#include "pe_and.h"
     12#include "pe_call.h"
     13#include "pe_charclass.h"
     14#include "pe_matchstar.h"
     15#include "pe_not.h"
     16#include "pe_or.h"
     17#include "pe_pabloe.h"
     18#include "pe_scanthru.h"
     19#include "pe_sel.h"
     20#include "pe_var.h"
     21#include "pe_xor.h"
    822
    923CC_Compiler_Helper::CC_Compiler_Helper(){}
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4182 r4194  
    108108utf_encoding.cpp
    109109utf_encoding.h
     110re/parsefailure.cpp
     111re/parsefailure.h
     112re/re_alt.cpp
     113re/re_alt.h
     114re/re_cc.cpp
     115re/re_cc.h
     116re/re_compiler.cpp
     117re/re_compiler.h
     118re/re_end.cpp
     119re/re_end.h
     120re/re_name.cpp
     121re/re_name.h
     122re/re_nullable.cpp
     123re/re_nullable.h
     124re/re_parser.cpp
     125re/re_parser.h
     126re/re_re.cpp
     127re/re_re.h
     128re/re_reducer.cpp
     129re/re_reducer.h
     130re/re_rep.cpp
     131re/re_rep.h
     132re/re_seq.cpp
     133re/re_seq.h
     134re/re_simplifier.cpp
     135re/re_simplifier.h
     136re/re_start.cpp
     137re/re_start.h
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r4182 r4194  
    11.
     2re
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4187 r4194  
    6565};
    6666
    67 #include <simd-lib/transpose.hpp>
     67#include "include/simd-lib/transpose.hpp"
    6868
    6969using namespace std;
     
    232232    encoding.setMask(0xFF);
    233233
    234     RE_Compiler* re_compiler = new RE_Compiler();
     234    auto * re_compiler = new re::RE_Compiler();
    235235    if (compile_time_option)
    236236    {
  • icGREP/icgrep-devel/icgrep/llvm_gen.cpp

    r4151 r4194  
    66
    77#include "llvm_gen.h"
     8//Pablo Expressions
     9#include "pe_advance.h"
     10#include "pe_all.h"
     11#include "pe_and.h"
     12#include "pe_call.h"
     13#include "pe_charclass.h"
     14#include "pe_matchstar.h"
     15#include "pe_not.h"
     16#include "pe_or.h"
     17#include "pe_pabloe.h"
     18#include "pe_scanthru.h"
     19#include "pe_sel.h"
     20#include "pe_var.h"
     21#include "pe_xor.h"
     22
     23//Pablo Statements
     24#include "ps_pablos.h"
     25#include "ps_assign.h"
     26#include "ps_if.h"
     27#include "ps_while.h"
    828
    929Ps* ps = NULL; Nl* nl = NULL; No* no = NULL; Lo* lo = NULL; Ll* ll = NULL; Lm* lm = NULL; Nd* nd = NULL;
  • icGREP/icgrep-devel/icgrep/llvm_gen.h

    r4187 r4194  
    9494#include "llvm/IR/IRBuilder.h"
    9595
    96 #include <simd-lib/bitblock.hpp>
     96#include "include/simd-lib/bitblock.hpp"
    9797
    9898using namespace llvm;
  • icGREP/icgrep-devel/icgrep/pbix_compiler.cpp

    r4187 r4194  
    66
    77#include "pbix_compiler.h"
    8 #include "printer_pablos.h"
     8//Regular Expressions
     9#include "re/re_name.h"
     10#include "re/re_start.h"
     11#include "re/re_end.h"
     12#include "re/re_seq.h"
     13#include "re/re_alt.h"
     14#include "re/re_rep.h"
     15
     16//Pablo Expressions
     17#include "pe_pabloe.h"
     18#include "pe_sel.h"
     19#include "pe_advance.h"
     20#include "pe_all.h"
     21#include "pe_and.h"
     22#include "pe_charclass.h"
     23#include "pe_call.h"
     24#include "pe_matchstar.h"
     25#include "pe_scanthru.h"
     26#include "pe_not.h"
     27#include "pe_or.h"
     28#include "pe_var.h"
     29#include "pe_xor.h"
     30
     31//Pablo Statements
     32#include "ps_pablos.h"
     33#include "ps_assign.h"
     34#include "ps_if.h"
     35#include "ps_while.h"
     36
     37#include <assert.h>
     38
     39using namespace re;
    940
    1041Pbix_Compiler::Pbix_Compiler(std::map<std::string, std::string> name_map)
     
    1748{
    1849    CodeGenState cg_state;
    19 
    20     for (auto it =  re_map.rbegin(); it != re_map.rend(); ++it)
    21     {
     50    for (auto i =  re_map.rbegin(); i != re_map.rend(); ++i) {
    2251        //This is specifically for the utf8 multibyte character classes.
    23         if (Seq* seq = dynamic_cast<Seq*>(it->second))
    24         {
    25             if (seq->getType() == Seq::Byte)
    26             {
     52        if (Seq * seq = dyn_cast<Seq>(i->second)) {
     53            if (seq->getType() == Seq::Type::Byte) {
    2754                std::string gs_retVal = symgen.gensym("start_marker");
    28                 cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
    29                 cg_state.newsym = gs_retVal;
    30 
    31                 auto endit = seq->end();
    32                 --endit;
    33 
    34                 for (auto it = seq->begin(); it != seq->end(); ++it)
    35                 {
    36                     Name* name = dynamic_cast<Name*>(*it);
    37                     if (it != endit)
    38                     {
     55                cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));               
     56                for (auto j = seq->begin();; ) {
     57                    Name * name = dyn_cast<Name>(*j);
     58                    assert (name);
     59                    And * cc_mask = new And(new Var(gs_retVal), new CharClass(name->getName()));
     60                    if (++j != seq->end()) {
    3961                        gs_retVal = symgen.gensym("marker");
    40                         cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(name->getName())))));
    41                         cg_state.newsym = gs_retVal;
     62                        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(cc_mask)));
    4263                    }
    43                     else
    44                     {
    45                         cg_state.stmtsl.push_back(new Assign(seq->getName(), new And(new Var(cg_state.newsym), new CharClass(name->getName()))));
     64                    else {
     65                        cg_state.stmtsl.push_back(new Assign(seq->getName(), cc_mask));
     66                        break;
    4667                    }
    4768                }
    48             }
    49         }
    50     }
    51 
     69                cg_state.newsym = gs_retVal;
     70            }
     71        }
     72    }
    5273    return cg_state;
    5374}
     
    6081    cg_state.stmtsl.push_back(new Assign(gs_m0, new All(1)));
    6182
    62     if (unicode_re(re))
     83    if (hasUnicode(re))
    6384    {
    6485        cg_state.newsym = gs_m0;
     
    7192        PabloE * u8pfx4 = new Var(m_name_map.find("UTF8-Prefix4")->second);
    7293        PabloE * u8pfx = new Or(new Or(u8pfx2, u8pfx3), u8pfx4);
    73         cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(u8pfx, u8single)));
     94        cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(u8pfx, u8single)));
    7495        cg_state.newsym = gs_initial;
    7596
     
    7899        std::string gs_nonfinal = symgen.gensym("internal.nonfinal");
    79100        m_name_map.insert(make_pair("internal.nonfinal", gs_nonfinal));
    80 //#define USE_IF_FOR_NONFINAL
    81 #ifdef USE_IF_FOR_NONFINAL
     101        //#define USE_IF_FOR_NONFINAL
     102        #ifdef USE_IF_FOR_NONFINAL
    82103        cg_state.stmtsl.push_back(new Assign(gs_nonfinal, new All(0)));
    83 #endif
     104        #endif
    84105        PabloE * u8scope32 = new Advance(u8pfx3);
    85106        PabloE * u8scope42 = new Advance(u8pfx4);
    86107        PabloE * u8scope43 = new Advance(u8scope42);
    87108        PabloS * assign_non_final = new Assign(gs_nonfinal, new Or(new Or(u8pfx, u8scope32), new Or(u8scope42, u8scope43)));
    88 #ifdef USE_IF_FOR_NONFINAL
     109        #ifdef USE_IF_FOR_NONFINAL
    89110        std::list<PabloS *> * if_body = new std::list<PabloS *> ();
    90111        if_body->push_back(assign_non_final);
    91112        cg_state.stmtsl.push_back(new If(u8pfx, *if_body));
    92 #endif
    93 #ifndef USE_IF_FOR_NONFINAL
     113        #else
    94114        cg_state.stmtsl.push_back(assign_non_final);
    95 #endif
    96         cg_state.newsym = gs_nonfinal;
     115        #endif
     116        cg_state.newsym = gs_nonfinal;
    97117    }
    98118
     
    111131CodeGenState Pbix_Compiler::re2pablo_helper(RE *re, CodeGenState cg_state)
    112132{
    113     if (Name* name = dynamic_cast<Name*>(re))
     133    if (Name* name = dyn_cast<Name>(re))
    114134    {
    115135        std::string gs_retVal = symgen.gensym("marker");
    116136        PabloE* markerExpr = new Var(cg_state.newsym);
    117         if (name->getType() != Name::FixedLength) {
     137        if (name->getType() != Name::Type::FixedLength) {
    118138            // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    119139            markerExpr = new And(markerExpr, new CharClass(m_name_map.find("internal.initial")->second));
     
    121141        }       
    122142        PabloE* ccExpr;
    123         if (name->getType() == Name::UnicodeCategory)
     143        if (name->getType() == Name::Type::UnicodeCategory)
    124144        {
    125145            ccExpr = new Call(name->getName());
     
    135155        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(ccExpr, markerExpr))));
    136156        cg_state.newsym = gs_retVal;
    137 
    138         //std::cout << "\n" << "(" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << std::endl;
    139     }
    140     else if (dynamic_cast<Start*>(re))
     157    }
     158    else if (isa<Start>(re))
    141159    {
    142160        std::string gs_retVal = symgen.gensym("start_of_line_marker");
     
    144162        cg_state.newsym = gs_retVal;
    145163    }
    146     else if (dynamic_cast<End*>(re))
     164    else if (isa<End>(re))
    147165    {
    148166        std::string gs_retVal = symgen.gensym("end_of_line_marker");
     
    150168        cg_state.newsym = gs_retVal;
    151169    }
    152     else if (Seq* seq = dynamic_cast<Seq*>(re))
     170    else if (Seq* seq = dyn_cast<Seq>(re))
    153171    {
    154172        if (!seq->empty())
     
    157175        }
    158176    }
    159     else if (Alt* alt = dynamic_cast<Alt*>(re))
     177    else if (Alt* alt = dyn_cast<Alt>(re))
    160178    {
    161179        if (alt->empty())
    162180        {
    163 
    164181            std::string gs_retVal = symgen.gensym("always_fail_marker");
    165182            cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(0)));
     
    179196
    180197    }
    181     else if (Rep* rep = dynamic_cast<Rep*>(re))
    182     {
    183         if ((dynamic_cast<Name*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (rep->getUB()== Rep::UNBOUNDED_REP))
    184         {
    185             Name* rep_name = dynamic_cast<Name*>(rep->getRE());
     198    else if (Rep* rep = dyn_cast<Rep>(re))
     199    {
     200        if (isa<Name>(rep->getRE()) && (rep->getLB() == 0) && (rep->getUB()== Rep::UNBOUNDED_REP))
     201        {
     202            Name* rep_name = dyn_cast<Name>(rep->getRE());
    186203            std::string gs_retVal = symgen.gensym("marker");
    187204
    188205            PabloE* ccExpr;
    189             if (rep_name->getType() == Name::UnicodeCategory)
     206            if (rep_name->getType() == Name::Type::UnicodeCategory)
    190207            {
    191208                ccExpr = new Call(rep_name->getName());
     
    200217                                        new CharClass(m_name_map.find("internal.nonfinal")->second)));
    201218            }
    202             if (rep_name->getType() == Name::FixedLength)
     219            if (rep_name->getType() == Name::Type::FixedLength)
    203220            {
    204221                cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), ccExpr)));
     
    262279    if (lb == 0)
    263280    {
    264          //std::cout << "While, no lb." << std::endl;
    265 
    266281         std::string while_test_gs_retVal = symgen.gensym("while_test");
    267282         std::string while_accum_gs_retVal = symgen.gensym("while_accum");
     
    310325
    311326
    312 bool Pbix_Compiler::unicode_re(RE *re)
    313 {
     327bool Pbix_Compiler::hasUnicode(const RE * re) {
    314328    bool found = false;
    315 
    316     return unicode_re_helper(re, found);
    317 }
    318 
    319 bool Pbix_Compiler::unicode_re_helper(RE *re, bool found)
    320 {
    321     if (!found)
    322     {
    323         if (Name* name = dynamic_cast<Name*>(re))
    324         {
    325             if ((name->getType() == Name::UnicodeCategory) || (name->getType() == Name::Unicode))
    326             {
     329    if (re == nullptr) {
     330        throw std::runtime_error("Unexpected Null Value passed to RE Compiler!");
     331    }
     332    else if (const Name * name = dyn_cast<const Name>(re)) {
     333        if ((name->getType() == Name::Type::UnicodeCategory) || (name->getType() == Name::Type::Unicode)) {
     334            found = true;
     335        }
     336    }
     337    else if (const Seq * re_seq = dyn_cast<const Seq>(re)) {
     338        for (auto i = re_seq->cbegin(); i != re_seq->cend(); ++i) {
     339            if (hasUnicode(*i)) {
    327340                found = true;
    328             }
    329         }
    330         else if (Seq* re_seq = dynamic_cast<Seq*>(re))
    331         {
    332             for (auto it = re_seq->begin(); it != re_seq->end(); ++it)
    333             {
    334                 found = unicode_re_helper(*it, found);
    335                 if (found) break;
    336             }
    337         }
    338         else if (Alt* re_alt = dynamic_cast<Alt*>(re))
    339         {
    340             for (auto it = re_alt->begin(); it != re_alt->end(); ++it)
    341             {
    342                 found = unicode_re_helper(*it, found);
    343                 if (found) break;
    344             }
    345         }
    346         else if (Rep* rep = dynamic_cast<Rep*>(re))
    347         {
    348             found = unicode_re_helper(rep->getRE(), found);
    349         }
    350     }
    351 
     341                break;
     342            }
     343        }
     344    }
     345    else if (const Alt * re_alt = dyn_cast<const Alt>(re)) {
     346        for (auto i = re_alt->cbegin(); i != re_alt->cend(); ++i) {
     347            if (hasUnicode(*i)) {
     348                found = true;
     349                break;
     350            }
     351        }
     352    }
     353    else if (const Rep * rep = dyn_cast<const Rep>(re)) {
     354        found = hasUnicode(rep->getRE());
     355    }
    352356    return found;
    353357}
  • icGREP/icgrep-devel/icgrep/pbix_compiler.h

    r4187 r4194  
    1010//Regular Expressions
    1111#include "re/re_re.h"
    12 #include "re/re_name.h"
    13 #include "re/re_start.h"
    14 #include "re/re_end.h"
    15 #include "re/re_seq.h"
    16 #include "re/re_alt.h"
    17 #include "re/re_rep.h"
    18 
    19 //Pablo Expressions
    20 #include "pe_pabloe.h"
    21 #include "pe_sel.h"
    22 #include "pe_advance.h"
    23 #include "pe_all.h"
    24 #include "pe_and.h"
    25 #include "pe_charclass.h"
    26 #include "pe_call.h"
    27 #include "pe_matchstar.h"
    28 #include "pe_scanthru.h"
    29 #include "pe_not.h"
    30 #include "pe_or.h"
    31 #include "pe_var.h"
    32 #include "pe_xor.h"
    33 
    3412//Pablo Statements
    3513#include "ps_pablos.h"
    36 #include "ps_assign.h"
    37 #include "ps_if.h"
    38 #include "ps_while.h"
    39 
    4014//Code Generation
    4115#include "symbol_generator.h"
    4216
    43 #include <iostream>
    4417#include <string>
    45 #include <sstream>
    4618#include <list>
    47 #include <vector>
    4819#include <map>
    4920
     
    5627class Pbix_Compiler
    5728{
    58     typedef RE::Vector              Vector;
     29    typedef re::RE                  RE;
     30    typedef re::Vector              Vector;
    5931    typedef Vector::const_iterator  const_iterator;
    6032
     
    7042    CodeGenState BoundedRep_helper(RE* repeated, int lb, int ub, CodeGenState cg_state);
    7143
    72     bool unicode_re(RE* re);
    73     bool unicode_re_helper(RE* re, bool found);
     44    static bool hasUnicode(const RE *re);
    7445
    7546    SymbolGenerator symgen;
  • icGREP/icgrep-devel/icgrep/printer_pablos.cpp

    r4086 r4194  
    66
    77#include "printer_pablos.h"
     8
     9//Regular Expressions
     10#include "re/re_re.h"
     11#include "re/re_cc.h"
     12#include "re/re_start.h"
     13#include "re/re_end.h"
     14#include "re/re_seq.h"
     15#include "re/re_name.h"
     16
     17//Pablo Expressions
     18#include "pe_advance.h"
     19#include "pe_all.h"
     20#include "pe_and.h"
     21#include "pe_call.h"
     22#include "pe_charclass.h"
     23#include "pe_matchstar.h"
     24#include "pe_not.h"
     25#include "pe_or.h"
     26#include "pe_pabloe.h"
     27#include "pe_scanthru.h"
     28#include "pe_sel.h"
     29#include "pe_var.h"
     30#include "pe_xor.h"
     31
     32//Pablo Statements
     33#include "ps_pablos.h"
     34#include "ps_assign.h"
     35#include "ps_if.h"
     36#include "ps_while.h"
    837
    938
     
    109138        retVal = "CharClass '" + cc->getCharClass() + "'";
    110139    }
    111     else if (Name* name = dynamic_cast<Name*>(expr))
     140    else if (re::Name * name = dynamic_cast<re::Name *>(expr))
    112141    {
    113142        retVal = "Name '" + name->getName() + "'";
  • icGREP/icgrep-devel/icgrep/printer_pablos.h

    r4187 r4194  
    88#define SHOW_H
    99
    10 //Regular Expressions
    11 #include "re/re_re.h"
    12 #include "re/re_cc.h"
    13 #include "re/re_start.h"
    14 #include "re/re_end.h"
    15 #include "re/re_seq.h"
    16 
    17 //Pablo Expressions
    18 #include "pe_pabloe.h"
    19 #include "pe_sel.h"
    20 #include "pe_advance.h"
    21 #include "pe_all.h"
    22 #include "pe_and.h"
    23 #include "pe_charclass.h"
    24 #include "pe_not.h"
    25 #include "pe_or.h"
    26 #include "pe_var.h"
    27 #include "pe_xor.h"
    28 #include "pe_matchstar.h"
    29 
    30 //Pablo Statements
    31 #include "ps_pablos.h"
    32 #include "ps_assign.h"
    33 #include "ps_if.h"
    34 
    3510#include "pbix_compiler.h"
    3611
    37 //Code Generation
    38 #include "symbol_generator.h"
    39 
    40 #include <iostream>
    4112#include <string>
    42 #include <sstream>
    4313#include <list>
    4414
  • icGREP/icgrep-devel/icgrep/printer_re.cpp

    r4187 r4194  
    1717#include "re/re_start.h"
    1818
     19using namespace re;
    1920
    2021const std::string Printer_RE::PrintRE(const RE * re)
     
    2526        retVal = "--> RE NullPtr! <--";
    2627    }
    27     else if (const Alt* re_alt = dynamic_cast<const Alt*>(re))
     28    else if (const Alt* re_alt = dyn_cast<const Alt>(re))
    2829    {
    2930        retVal += "(Alt[";
     
    3839        retVal += "])";
    3940    }
    40     else if (const CC* re_cc = dynamic_cast<const CC*>(re))
     41    else if (const CC* re_cc = dyn_cast<const CC>(re))
    4142    {
    4243        retVal = "CC \"";
     
    5253        }
    5354    }
    54     else if (const Name* re_name = dynamic_cast<const Name*>(re))
     55    else if (const Name* re_name = dyn_cast<const Name>(re))
    5556    {
    5657        retVal = "Name \"";
     
    5859        retVal += "\" ";
    5960    }
    60     else if (dynamic_cast<const End*>(re))
     61    else if (isa<const End>(re))
    6162    {
    6263        retVal = "End";
    6364    }
    64     else if (const Rep* re_rep = dynamic_cast<const Rep*>(re))
     65    else if (const Rep* re_rep = dyn_cast<const Rep>(re))
    6566    {
    6667        retVal = "Rep(";
     
    7778        retVal.append(")");
    7879    }
    79     else if (const Seq* re_seq = dynamic_cast<const Seq*>(re))
     80    else if (const Seq* re_seq = dyn_cast<const Seq>(re))
    8081    {
    8182        retVal = "(Seq[";
     
    9091        retVal.append("])");
    9192    }
    92     else if (dynamic_cast<const Start*>(re))
     93    else if (isa<const Start>(re))
    9394    {
    9495        retVal = "Start";
  • icGREP/icgrep-devel/icgrep/printer_re.h

    r4187 r4194  
    1010#include <string>
    1111
    12 class RE;
     12namespace re {
     13    class RE;
     14}
    1315
    1416class Printer_RE
    1517{
    1618public:
    17     static const std::string PrintRE(const RE *re);
     19    static const std::string PrintRE(const re::RE *re);
    1820};
    1921
  • icGREP/icgrep-devel/icgrep/re/re_alt.cpp

    r4187 r4194  
    66
    77#include "re_alt.h"
    8 
    9 Alt::Alt()
    10 {
    11 
    12 }
    13 
    14 Alt::Alt(iterator begin, iterator end)
    15 : std::vector<RE*>(begin, end)
    16 {
    17 
    18 }
    19 
    20 Alt::~Alt()
    21 {
    22     for (RE * re : *this) {
    23         delete re;
    24     }
    25 }
  • icGREP/icgrep-devel/icgrep/re/re_alt.h

    r4187 r4194  
    99
    1010#include "re_re.h"
    11 #include <algorithm>
    12 #include <list>
    1311
     12namespace re {
    1413
    15 class Alt : public RE, public RE::Vector {
     14class Alt : public Vector {
    1615public:
    17     typedef RE::Vector Vector;
    18     Alt();   
    19     Alt(iterator begin, iterator end);
    20     virtual ~Alt();
     16    static inline bool classof(const RE * re) {
     17        return re->getClassTypeId() == ClassTypeId::Alt;
     18    }
     19    static inline bool classof(const void *) {
     20        return false;
     21    }
     22    virtual RE * clone() const {
     23        return new Alt(*this);
     24    }
     25protected:
     26    friend Alt * makeAlt();
     27    friend Alt * makeAlt(Alt::iterator, Alt::iterator);
     28    Alt()
     29    : Vector(ClassTypeId::Alt) {
     30
     31    }
     32    Alt(const Alt & alt)
     33    : Vector(ClassTypeId::Alt, alt.cbegin(), alt.cend(), true) {
     34
     35    }
     36    Alt(iterator begin, iterator end)
     37    : Vector(ClassTypeId::Alt, begin, end) {
     38
     39    }
    2140};
     41
     42inline Alt * makeAlt() {
     43    return new Alt();
     44}
     45
     46inline Alt * makeAlt(Alt::iterator begin, Alt::iterator end) {
     47    return new Alt(begin, end);
     48}
     49
     50}
    2251
    2352#endif // ALT_H
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4187 r4194  
    66
    77#include "re_cc.h"
    8 #include <assert.h>
    9 #include <atomic>
    108
    11 CC::CC() {
     9namespace re {
    1210
     11CC::CC(const CC * cc1, const CC * cc2)
     12: RE(ClassTypeId::CC)
     13, mSparseCharSet(cc1->cbegin(), cc1->cend()) {
     14    for (const CharSetItem & i : cc2->mSparseCharSet) {
     15        insert_range(i.lo_codepoint, i.hi_codepoint);
     16    }
    1317}
    1418
    15 CC::CC(const CodePointType codepoint) {
    16     insert(codepoint);
    17 }
    18 
    19 CC::CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
    20     insert_range(lo_codepoint, hi_codepoint);
    21 }
    22 
    23 CC::CC(const CC * cc1, const CC * cc2) {
    24     mSparseCharSet.assign(cc1->cbegin(), cc1->cend());
    25     join(cc2->mSparseCharSet);
    26 }
    27 
    28 CC::~CC() {
     19CC::CC(const CC & cc)
     20: RE(ClassTypeId::CC)
     21, mSparseCharSet(cc.cbegin(), cc.cend()) {
    2922
    3023}
     
    3730    }
    3831    return name;
    39 }
    40 
    41 void CC::join(const CharSetVector & other) {
    42     for (const CharSetItem & i : other) {
    43         insert_range(i.lo_codepoint, i.hi_codepoint);
    44     }
    4532}
    4633
     
    10996    }
    11097}
     98
     99}
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4187 r4194  
    1010#include "re_re.h"
    1111
    12 #include <iostream>
    1312#include <string>
    14 #include <sstream>
    15 #include <utility>
    1613#include <vector>
     14
     15namespace re {
    1716
    1817typedef int CodePointType;
     
    3029public:
    3130
     31    static inline bool classof(const RE * re) {
     32        return re->getClassTypeId() == ClassTypeId::CC;
     33    }
     34    static inline bool classof(const void *) {
     35        return false;
     36    }
     37    virtual RE * clone() const {
     38        return new CC(*this);
     39    }
     40
    3241    typedef CharSetVector::iterator                 iterator;
    3342    typedef CharSetVector::const_iterator           const_iterator;
     
    3746
    3847    static const CodePointType UNICODE_MAX = 0x10FFFF;
    39     CC();
    40     CC(const CodePointType codepoint);
    41     CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    42     CC(const CC * cc1, const CC * cc2);
    43     ~CC();
     48
    4449    std::string getName() const;
     50
    4551    void insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
     52
    4653    void negate();
     54
    4755    void remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    4856
     
    7179    }
    7280
     81    inline const_iterator begin() const {
     82        return mSparseCharSet.cbegin();
     83    }
     84
     85    inline const_iterator end() const {
     86        return mSparseCharSet.cend();
     87    }
     88
    7389    inline const_iterator cbegin() const {
    7490        return mSparseCharSet.cbegin();
     
    95111    }
    96112
     113    virtual ~CC() {}
     114
     115protected:
     116
     117    inline CC()
     118    : RE(ClassTypeId::CC) {
     119
     120    }
     121    CC(const CC & cc);
     122    inline CC(const CodePointType codepoint)
     123    : RE(ClassTypeId::CC) {
     124        insert(codepoint);
     125    }
     126    inline CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint)
     127    : RE(ClassTypeId::CC) {
     128        insert_range(lo_codepoint, hi_codepoint);
     129    }
     130
     131    CC(const CC * cc1, const CC * cc2);
     132
     133    friend CC * makeCC();
     134    friend CC * makeCC(const CodePointType codepoint);
     135    friend CC * makeCC(const CodePointType lo, const CodePointType hi);
     136    friend CC * makeCC(const CC * cc1, const CC * cc2);
     137
    97138private:   
    98     void join(const CharSetVector & other);
    99139    CharSetVector mSparseCharSet;
    100140};
     
    116156}
    117157
     158/**
     159 * @brief RE::makeCC
     160 *
     161 * Various factory constructors for the RE CC class
     162 *
     163 * @return a CC object
     164 */
     165
     166inline CC * makeCC() {
     167    return new CC();
     168}
     169
     170inline CC * makeCC(const CodePointType codepoint) {
     171    return new CC(codepoint);
     172}
     173
     174inline CC * makeCC(const CodePointType lo, const CodePointType hi) {
     175    return new CC(lo, hi);
     176}
     177
     178inline CC * makeCC(const CC * cc1, const CC * cc2) {
     179    return new CC(cc1, cc2);
     180}
     181
     182}
    118183
    119184#endif // RE_CC_H
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4187 r4194  
    3232//#include "pbix_counter.h"
    3333
     34// #define DEBUG_PRINT_RE_AST
     35// #define DEBUG_PRINT_PBIX_AST
     36
     37namespace re {
    3438
    3539RE_Compiler::RE_Compiler(){}
     
    5963    }
    6064
     65    #ifdef DEBUG_PRINT_RE_AST
    6166    //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
    62     // std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     67    std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     68    #endif
    6369
    6470    //Add the UTF encoding.
     
    7682    }
    7783
     84    #ifdef DEBUG_PRINT_RE_AST
    7885    //Print to the terminal the AST that was generated by the utf8 encoder.
    79     // std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     86    std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     87    #endif
    8088
    8189    //Optimization passes to simplify the AST.
     
    8694    re_ast = RE_Simplifier::simplify(re_ast);
    8795
     96    #ifdef DEBUG_PRINT_RE_AST
    8897    //Print to the terminal the AST that was generated by the simplifier.
    89     // std::cout << "\nSimplifier:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     98    std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     99    #endif
    90100
    91101    //Map all of the unique character classes in order to reduce redundancy.
     
    93103    re_ast = RE_Reducer::reduce(re_ast, re_map);
    94104
     105    #ifdef DEBUG_PRINT_RE_AST
    95106    //Print to the terminal the AST with the reduced REs.
    96     // std::cerr << "Reducer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     107    std::cerr << "Reducer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     108    #endif
    97109
    98110    //Build our list of predefined characters.
     
    100112    std::map<std::string,std::string> name_map;
    101113
    102     CC* cc_lf = new CC(0x0A);
     114    CC* cc_lf = makeCC(0x0A);
    103115    cc_name = cc_lf->getName();
    104116    re_map.insert(make_pair(cc_name, cc_lf));
    105117    name_map.insert(make_pair("LineFeed", cc_name));
    106118
    107     CC* cc_utf8_single_byte = new CC(0x00, 0x7F);
     119    CC* cc_utf8_single_byte = makeCC(0x00, 0x7F);
    108120    cc_name = cc_utf8_single_byte->getName();
    109121    re_map.insert(make_pair(cc_name, cc_utf8_single_byte));
    110122    name_map.insert(make_pair("UTF8-SingleByte", cc_name));
    111123
    112     CC* cc_utf8_prefix2 = new CC(0xC2, 0xDF);
     124    CC* cc_utf8_prefix2 = makeCC(0xC2, 0xDF);
    113125    cc_name = cc_utf8_prefix2->getName();
    114126    re_map.insert(make_pair(cc_name, cc_utf8_prefix2));
    115127    name_map.insert(make_pair("UTF8-Prefix2", cc_name));
    116128
    117     CC* cc_utf8_prefix3 = new CC(0xE0, 0xEF);
     129    CC* cc_utf8_prefix3 = makeCC(0xE0, 0xEF);
    118130    cc_name = cc_utf8_prefix3->getName();
    119131    re_map.insert(make_pair(cc_name, cc_utf8_prefix3));
    120132    name_map.insert(make_pair("UTF8-Prefix3", cc_name));
    121133
    122     CC* cc_utf8_prefix4 = new CC(0xF0, 0xF4);
     134    CC* cc_utf8_prefix4 = makeCC(0xF0, 0xF4);
    123135    cc_name = cc_utf8_prefix4->getName();
    124136    re_map.insert(make_pair(cc_name, cc_utf8_prefix4));
     
    127139    cc_compiler.compile_from_map(re_map);
    128140    std::list<PabloS*> cc_stmtsl = cc_compiler.get_compiled();
     141    #ifdef DEBUG_PRINT_PBIX_AST
    129142    //Print to the terminal the AST that was generated by the character class compiler.
    130     // std::cerr << "CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
     143    std::cerr << "Pablo CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
     144    #endif
    131145
    132146    Pbix_Compiler pbix_compiler(name_map);
    133147    CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
     148    #ifdef DEBUG_PRINT_PBIX_AST
    134149    //Print to the terminal the AST that was generated for the re subexpressions.
    135     // std::cerr << "\n" << "Subexpressions: (" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << ")" << std::endl;
     150    std::cerr << "Subexpressions:" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << std::endl;
     151    #endif
    136152
    137153    CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
     154    #ifdef DEBUG_PRINT_PBIX_AST
    138155    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    139     // std::cerr << "\n" << "(" << StatementPrinter::PrintStmts(re_cg_state) << ")" << "\n" << std::endl;
    140 
     156    std::cerr << "Final Pablo AST:" << StatementPrinter::PrintStmts(re_cg_state) << ")" << std::endl;
    141157    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
    142     //std::cout << "\nPablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) <<  "\n" << std::endl;
     158    // std::cerr << "Pablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) << std::endl;
     159    #endif
    143160
    144161    LLVM_Generator irgen(name_map, basis_pattern, encoding.getBits());
     
    164181}
    165182
     183}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4187 r4194  
    1212#include "../llvm_gen.h"
    1313#include <string>
     14
     15namespace re {
    1416
    1517struct processed_parsetree_results{
     
    3032};
    3133
     34}
     35
    3236#endif // RE_COMPILER_H
  • icGREP/icgrep-devel/icgrep/re/re_end.cpp

    r4187 r4194  
    66
    77#include "re_end.h"
    8 
    9 End::End()
    10 {
    11     mCC = new CC();
    12 }
    13 
    14 CC* End::getCC()
    15 {
    16     return mCC;
    17 }
    18 
    19 End::~End(){}
  • icGREP/icgrep-devel/icgrep/re/re_end.h

    r4187 r4194  
    99
    1010#include "re_re.h"
    11 #include "re_cc.h"
    1211
    13 class End : public RE
    14 {
     12namespace re {
     13
     14class End : public RE {
    1515public:
    16     End();
    17     CC* getCC();
    18     ~End();
    19 private:
    20     CC* mCC;
     16    static inline bool classof(const RE * re) {
     17        return re->getClassTypeId() == ClassTypeId::End;
     18    }
     19    static inline bool classof(const void *) {
     20        return false;
     21    }
     22    virtual RE * clone() const {
     23        return new End();
     24    }
     25    virtual ~End() {}
     26protected:
     27    friend End * makeEnd();
     28    End() : RE(ClassTypeId::End) {}
    2129};
    2230
     31inline End * makeEnd() {
     32    return new End();
     33}
     34
     35}
     36
    2337#endif // END_H
  • icGREP/icgrep-devel/icgrep/re/re_name.cpp

    r4187 r4194  
    11#include "re_name.h"
    2 
    3 Name::Name()
    4 : mName()
    5 , mNegated(false)
    6 , mType(Name::FixedLength) {
    7     mName = "";
    8     mNegated = false;
    9     mType = Name::FixedLength;
    10 }
    11 
    12 Name::Name(std::string name)
    13 : mName(name)
    14 , mNegated(false)
    15 , mType(Name::FixedLength) {
    16 
    17 }
    18 
    19 Name::Name(const Name * name)
    20 : mName(name->getName())
    21 , mNegated(name->isNegated())
    22 , mType(name->getType()) {
    23 
    24 }
    25 
    26 Name::~Name(){
    27 
    28 }
    29 
    30 void Name::setName(std::string name) {
    31     mName = name;
    32 }
    33 
    34 std::string Name::getName() const {
    35     return mName;
    36 }
    37 
    38 bool Name::isNegated() const {
    39     return mNegated;
    40 }
    41 
    42 void Name::setNegated(const bool is_negated) {
    43     mNegated = is_negated;
    44 }
    45 
    46 void Name::setType(const Type type) {
    47     mType = type;
    48 }
    49 
    50 Name::Type Name::getType() const {
    51     return mType;
    52 }
    53 
    54 
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4187 r4194  
    33
    44#include "re_re.h"
    5 
    65#include <string>
    76
    8 class Name : public RE
    9 {
     7namespace re {
     8
     9class Name : public RE {
    1010public:
    11     typedef enum {FixedLength,Unicode,UnicodeCategory} Type;
    12     Name();
    13     Name(const Name * name);
    14     Name(std::string name);
     11    static inline bool classof(const RE * re) {
     12        return re->getClassTypeId() == ClassTypeId::Name;
     13    }
     14    static inline bool classof(const void *) {
     15        return false;
     16    }
     17    virtual RE * clone() const {
     18        return new Name(*this);
     19    }
     20    enum class Type {
     21        FixedLength
     22        ,Unicode
     23        ,UnicodeCategory
     24    };
    1525    void setName(std::string name);
    1626    std::string getName() const;
     
    1929    void setType(const Type type);
    2030    Type getType() const;
    21     ~Name();
     31    virtual ~Name() {}
     32protected:
     33    friend Name * makeName();
     34    friend Name * makeName(const Name *);
     35    friend Name * makeName(std::string, const bool, const Type);
     36    Name();
     37    Name(const Name & name);
     38    Name(std::string name, const bool negated, const Type type);
    2239private:
    2340    std::string mName;
     
    2643};
    2744
     45inline Name::Name()
     46: RE(ClassTypeId::Name)
     47, mName()
     48, mNegated(false)
     49, mType(Type::FixedLength) {
     50
     51}
     52
     53inline Name::Name(std::string name, const bool negated, const Type type)
     54: RE(ClassTypeId::Name)
     55, mName(name)
     56, mNegated(negated)
     57, mType(type) {
     58
     59}
     60
     61inline Name::Name(const Name &name)
     62: RE(ClassTypeId::Name)
     63, mName(name.getName())
     64, mNegated(name.isNegated())
     65, mType(name.getType()) {
     66
     67}
     68
     69inline void Name::setName(std::string name) {
     70    mName = name;
     71}
     72
     73inline std::string Name::getName() const {
     74    return mName;
     75}
     76
     77inline bool Name::isNegated() const {
     78    return mNegated;
     79}
     80
     81inline void Name::setNegated(const bool is_negated) {
     82    mNegated = is_negated;
     83}
     84
     85inline void Name::setType(const Type type) {
     86    mType = type;
     87}
     88
     89inline Name::Type Name::getType() const {
     90    return mType;
     91}
     92
     93inline Name * makeName() {
     94    return new Name();
     95}
     96
     97inline Name * makeName(const Name * name) {
     98    return new Name(*name);
     99}
     100
     101inline Name * makeName(std::string name, const bool negated = false, const Name::Type type = Name::Type::FixedLength) {
     102    return new Name(name, negated, type);
     103}
     104
     105}
     106
    28107#endif // RE_NAME_H
  • icGREP/icgrep-devel/icgrep/re/re_nullable.cpp

    r4187 r4194  
    1515*/
    1616
     17namespace re {
     18
    1719RE * RE_Nullable::removeNullablePrefix(RE * re) {
    18     if (Seq * re_seq = dynamic_cast<Seq*>(re)) {
    19         re = removeNullableSeqPrefix(re_seq);
     20    if (Seq * seq = dyn_cast<Seq>(re)) {
     21        re = removeNullableSeqPrefix(seq);
    2022    }
    21     else if (Alt * re_alt = dynamic_cast<Alt*>(re)) {
    22         Alt * new_alt = new Alt();
    23         for (RE * re : *re_alt) {
    24             new_alt->push_back(removeNullablePrefix(re));
     23    else if (Alt * alt = dyn_cast<Alt>(re)) {
     24        for (auto i = alt->begin(); i != alt->end(); ++i) {
     25            *i = removeNullablePrefix(*i);
    2526        }
    26         re = new_alt;
     27        re = alt;
    2728    }
    28     else if (Rep * re_rep = dynamic_cast<Rep*>(re)) {
    29         if ((re_rep->getLB() == 0) || (isNullable(re_rep->getRE()))) {
    30             re = new Seq();
     29    else if (Rep * rep = dyn_cast<Rep>(re)) {
     30        if ((rep->getLB() == 0) || (isNullable(rep->getRE()))) {
     31            re = makeSeq();
    3132        }
    32         else if (hasNullablePrefix(re_rep->getRE())) {
    33             Vector seq;
    34             seq.push_back(removeNullablePrefix(re_rep->getRE()));
    35             seq.push_back(new Rep(re_rep->getRE(), re_rep->getLB() - 1, re_rep->getLB() - 1));
    36             re = RE_Simplifier::makeSeq(Seq::Normal, seq);
     33        else if (hasNullablePrefix(rep->getRE())) {
     34            Seq * seq = makeSeq();
     35            seq->push_back(removeNullablePrefix(rep->getRE()));
     36            seq->push_back(makeRep(rep->getRE(), rep->getLB() - 1, rep->getLB() - 1));
     37            re = RE_Simplifier::simplify(seq);
    3738        }
    3839        else {
    39             re = RE_Simplifier::makeRep(re_rep->getRE(), re_rep->getLB(), re_rep->getLB());
     40            re = RE_Simplifier::simplify(rep);
    4041        }
    4142    }
     
    4445
    4546inline Seq * RE_Nullable::removeNullableSeqPrefix(const Seq * seq) {
    46     Seq * new_seq = new Seq(seq->getType());
     47    Seq * new_seq = makeSeq(seq->getType());
    4748    if (!seq->empty()) {
    4849        auto i = seq->begin();
     
    6263
    6364RE * RE_Nullable::removeNullableSuffix(RE * re) {
    64     if (Seq * re_seq = dynamic_cast<Seq*>(re)) {
    65         re = removeNullableSeqSuffix(re_seq);
     65    if (Seq * seq = dyn_cast<Seq>(re)) {
     66        re = removeNullableSeqSuffix(seq);
    6667    }
    67     else if (Alt* re_alt = dynamic_cast<Alt*>(re)) {
    68         Alt* new_alt = new Alt();
    69         for (RE * re : *re_alt) {
    70             new_alt->push_back(removeNullableSuffix(re));
     68    else if (Alt* alt = dyn_cast<Alt>(re)) {
     69        for (auto i = alt->begin(); i != alt->end(); ++i) {
     70            *i = removeNullableSuffix(*i);
    7171        }
    72         re = new_alt;
    7372    }
    74     else if (Rep * re_rep = dynamic_cast<Rep*>(re)) {
    75         if ((re_rep->getLB() == 0) || (isNullable(re_rep->getRE()))) {
    76             re = new Seq();
     73    else if (Rep * rep = dyn_cast<Rep>(re)) {
     74        if ((rep->getLB() == 0) || (isNullable(rep->getRE()))) {
     75            delete rep;
     76            re = makeSeq();
    7777        }
    78         else if (hasNullableSuffix(re_rep->getRE())) {
    79             Vector seq;
    80             seq.push_back(RE_Simplifier::makeRep(re_rep->getRE(), re_rep->getLB() - 1, re_rep->getLB() - 1));
    81             seq.push_back(removeNullableSuffix(re_rep->getRE()));
    82             re = RE_Simplifier::makeSeq(Seq::Normal, seq);
     78        else if (hasNullableSuffix(rep->getRE())) {
     79            Seq * seq = makeSeq();
     80            seq->push_back(RE_Simplifier::simplify(makeRep(rep->getRE()->clone(), rep->getLB() - 1, rep->getLB() - 1)));
     81            seq->push_back(removeNullableSuffix(rep->getRE()));
     82            delete rep;
     83            re = RE_Simplifier::simplify(seq);
    8384        }
    8485        else {
    85             re = RE_Simplifier::makeRep(re_rep->getRE(), re_rep->getLB(), re_rep->getLB());
     86            re = RE_Simplifier::simplify(rep);
    8687        }
    8788    }
     
    9091
    9192inline Seq * RE_Nullable::removeNullableSeqSuffix(const Seq * seq) {
    92     Seq * new_seq = new Seq(seq->getType());
     93    Seq * new_seq = makeSeq(seq->getType());
    9394    if (!seq->empty()) {
    9495        auto i = seq->end();
     
    105106
    106107bool RE_Nullable::isNullable(const RE * re) {
    107     if (const Seq * re_seq = dynamic_cast<const Seq*>(re)) {
     108    if (const Seq * re_seq = dyn_cast<const Seq>(re)) {
    108109        return isNullableVector(re_seq);
    109110    }
    110     else if (const Alt* re_alt = dynamic_cast<const Alt*>(re)) {
     111    else if (const Alt* re_alt = dyn_cast<const Alt>(re)) {
    111112        return isNullableVector(re_alt);
    112113    }
    113     else if (const Rep* re_rep = dynamic_cast<const Rep*>(re)) {
     114    else if (const Rep* re_rep = dyn_cast<const Rep>(re)) {
    114115        return re_rep->getLB() == 0 ? true : isNullable(re_rep->getRE());
    115116    }
     
    128129bool RE_Nullable::hasNullablePrefix(const RE * re) {
    129130    bool nullable = false;
    130     if (const Seq * seq = dynamic_cast<const Seq*>(re)) {
     131    if (const Seq * seq = dyn_cast<const Seq>(re)) {
    131132        nullable = isNullable(seq->front()) ? true : hasNullablePrefix(seq->front());
    132133    }
    133     else if (const Alt * alt = dynamic_cast<const Alt*>(re)) {
     134    else if (const Alt * alt = dyn_cast<const Alt>(re)) {
    134135        if (!alt->empty()) {
    135136            nullable = true;
     
    142143        }
    143144    }
    144     else if (const Rep * rep = dynamic_cast<const Rep*>(re)) {
     145    else if (const Rep * rep = dyn_cast<const Rep>(re)) {
    145146        nullable = hasNullablePrefix(rep->getRE());
    146147    }
     
    150151bool RE_Nullable::hasNullableSuffix(const RE * re) {
    151152    bool nullable = false;
    152     if (const Seq * seq = dynamic_cast<const Seq*>(re)) {
     153    if (const Seq * seq = dyn_cast<const Seq>(re)) {
    153154        nullable = isNullable(seq->back()) ? true : hasNullableSuffix(seq->back());
    154155    }
    155     else if (const Alt * alt = dynamic_cast<const Alt*>(re)) {
     156    else if (const Alt * alt = dyn_cast<const Alt>(re)) {
    156157        if (!alt->empty()) {
    157158            nullable = true;
     
    164165        }
    165166    }
    166     else if (const Rep * rep = dynamic_cast<const Rep*>(re)) {
     167    else if (const Rep * rep = dyn_cast<const Rep>(re)) {
    167168        nullable = hasNullableSuffix(rep->getRE());
    168169    }
     
    170171}
    171172
    172 
    173 
     173}
  • icGREP/icgrep-devel/icgrep/re/re_nullable.h

    r4187 r4194  
    77#include <vector>
    88
     9namespace re {
     10
    911class RE_Nullable {
    10     typedef RE::Vector Vector;
    1112public:
    1213    static RE* removeNullablePrefix(RE* re);
     
    2122};
    2223
     24}
     25
    2326#endif // RE_NULLABLE_H
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4193 r4194  
    1414#include <algorithm>
    1515
     16namespace re {
     17
    1618RE * RE_Parser::parse_re(const std::string & regular_expression, const bool allow_escapes_within_charset) {
    1719    RE_Parser parser(regular_expression, allow_escapes_within_charset);
     
    4547
    4648RE * RE_Parser::parse_alt(const bool subexpression) {
    47     std::unique_ptr<Alt> alt(new Alt());
     49    std::unique_ptr<Alt> alt(makeAlt());
    4850    for (;;) {
    4951        alt->push_back(parse_seq());
     
    7072
    7173inline RE * RE_Parser::parse_seq() {
    72     std::unique_ptr<Seq> seq(new Seq());
     74    std::unique_ptr<Seq> seq(makeSeq());
    7375    for (;;) {
    7476        RE * re = parse_next_token();
     
    9597            case '^':
    9698                ++_cursor;
    97                 re = new Start;
     99                re = makeStart();
    98100                break;
    99101            case '$':
    100102                ++_cursor;
    101                 re = new End;
     103                re = makeEnd();
    102104                break;
    103105            case '|': case ')':
     
    120122
    121123CC * RE_Parser::parse_any_character() {
    122     CC * cc = new CC();
     124    CC * cc = makeCC();
    123125    cc->insert_range(0, 9);
    124     cc->insert_range(11, 0x10FFFF);
     126    cc->insert_range(11, CC::UNICODE_MAX);
    125127    ++_cursor;
    126128    return cc;
     
    134136        case '*':
    135137            ++_cursor; // skip past the '*'
    136             re = new Rep(re, 0, Rep::UNBOUNDED_REP);
     138            re = makeRep(re, 0, Rep::UNBOUNDED_REP);
    137139            break;
    138140        case '?':
    139141            ++_cursor; // skip past the '?'
    140             re = new Rep(re, 0, 1);
     142            re = makeRep(re, 0, 1);
    141143            break;
    142144        case '+':
    143145            ++_cursor; // skip past the '+'
    144             re = new Rep(re, 1, Rep::UNBOUNDED_REP);
     146            re = makeRep(re, 1, Rep::UNBOUNDED_REP);
    145147            break;
    146148        case '{':
     
    168170    throw_incomplete_expression_error_if_end_of_stream();
    169171    if (*_cursor == '}') {
    170         rep = new Rep(re, lower_bound, lower_bound);
     172        rep = makeRep(re, lower_bound, lower_bound);
    171173    }
    172174    else if (*_cursor != ',') {
     
    177179        throw_incomplete_expression_error_if_end_of_stream();
    178180        if (*_cursor == '}') {
    179             rep = new Rep(re, lower_bound, Rep::UNBOUNDED_REP);
     181            rep = makeRep(re, lower_bound, Rep::UNBOUNDED_REP);
    180182        }
    181183        else {
     
    184186                throw BadUpperBound();
    185187            }
    186             rep = new Rep(re, lower_bound, upper_bound);
     188            rep = makeRep(re, lower_bound, upper_bound);
    187189        }
    188190    }
     
    197199    }
    198200    else {
    199         return new CC(parse_utf8_codepoint());
     201        return makeCC(parse_utf8_codepoint());
    200202    }
    201203}
     
    209211        case '.': case '?': case '[': case '\\':
    210212        case ']': case '{': case '|': case '}':
    211             return new CC(*_cursor++);
     213            return makeCC(*_cursor++);
    212214        case 'u':
    213             return new CC(parse_hex());
     215            return makeCC(parse_hex());
    214216        case 'P':
    215217            negated = true;
     
    245247                }
    246248                c = (c << 6) | static_cast<unsigned>(*_cursor & 0x3F);
    247                 // It is an error if a 3-byte sequence is used to encode a codepoint < 0x800
    248                 // or a 4-byte sequence is used to encode a codepoint < 0x10000.
    249                 // if (((bytes == 1) && (c < 0x20)) || ((bytes == 2) && (c < 0x10))) {
    250                 if ((c << (bytes - 1)) < 0x20) {
     249                // It is an error if a 3-byte sequence is used to encode a codepoint < 0x800
     250                // or a 4-byte sequence is used to encode a codepoint < 0x10000.
     251                // if (((bytes == 1) && (c < 0x20)) || ((bytes == 2) && (c < 0x10))) {
     252                if ((c << (bytes - 1)) < 0x20) {
    251253                    throw InvalidUTF8Encoding();
    252254                }
    253                  
    254255            }
    255256        }
     
    257258    // It is an error if a 4-byte sequence is used to encode a codepoint
    258259    // above the Unicode maximum.   
    259     if (c > 0x10FFFF) throw InvalidUTF8Encoding();
     260    if (c > CC::UNICODE_MAX) {
     261        throw InvalidUTF8Encoding();
     262    }
    260263    return c;
    261264}
     
    263266inline Name * RE_Parser::parse_unicode_category(const bool negated) {
    264267    if (++_cursor != _end && *_cursor == '{') {
    265         std::unique_ptr<Name> name = std::unique_ptr<Name>(new Name);
    266         name->setType(Name::UnicodeCategory);
    267         name->setNegated(negated);
    268268        const cursor_t start = _cursor + 1;
    269269        for (;;) {
     270            if (++_cursor == _end) {
     271                throw UnclosedUnicodeCharacterClass();
     272            }
     273            if (*_cursor == '}') {
     274                break;
     275            }
    270276            ++_cursor;
    271             if (_cursor == _end) {
    272                 throw UnclosedUnicodeCharacterClass();
    273             }
    274             if (*_cursor == '}') {
    275                 break;
    276             }
    277             ++_cursor;
    278         }
    279         name->setName(std::string(start, _cursor));
    280         ++_cursor;
    281         return name.release();
     277        }
     278        return makeName(std::string(start, _cursor++), negated, Name::Type::UnicodeCategory);
    282279    }
    283280    throw ParseFailure("Incorrect Unicode character class format!");
     
    285282
    286283RE * RE_Parser::parse_charset() {
    287     std::unique_ptr<CC> cc(new CC());
     284    std::unique_ptr<CC> cc(makeCC());
    288285    bool negated = false;
    289     bool included_closing_square_bracket = false;
    290286    cursor_t start = ++_cursor;
    291287    while (_cursor != _end) {
     
    306302                    cc->insert(']');
    307303                    ++_cursor;
    308                     included_closing_square_bracket = true;
    309304                    literal = false;
    310305                    break;
     
    322317                    const cursor_t next = _cursor + 1;
    323318                    if (next == _end) {
    324                         goto parse_failed;
     319                        throw UnclosedCharacterClass();
    325320                    }
    326321                    if ((start == _cursor) ? (*next != '-') : (*next == ']')) {
     
    357352        }
    358353    }
    359 parse_failed:
    360     if (included_closing_square_bracket) {
    361         throw ParseFailure("One ']' cannot close \"[]\" or \"[^]\"; use \"[]]\" or \"[^]]\" instead.");
    362     }
    363     else {
    364         throw UnclosedCharacterClass();
    365     }
     354    throw UnclosedCharacterClass();
    366355}
    367356
     
    441430    if (_cursor == _end) throw IncompleteRegularExpression();
    442431}
     432
     433}
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4187 r4194  
    1515#include <list>
    1616#include <memory>
     17
     18namespace re {
    1719
    1820class RE_Parser
     
    6769};
    6870
     71}
     72
    6973#endif // RE_PARSER_H
  • icGREP/icgrep-devel/icgrep/re/re_re.cpp

    r4187 r4194  
    77#include "re_re.h"
    88
    9 RE::RE(){};
    10 RE::~RE(){};
     9namespace re {
    1110
     11RE::~RE() {
    1212
     13}
    1314
    14 
    15 
     15}
  • icGREP/icgrep-devel/icgrep/re/re_re.h

    r4187 r4194  
    99
    1010#include <vector>
     11#include <assert.h>
    1112
    12 class RE
    13 {
     13namespace re {
     14
     15class Vector;
     16class Pair;
     17
     18class Alt;
     19class CC;
     20class Diff;
     21class End;
     22class Intersect;
     23class Name;
     24class Permute;
     25class Rep;
     26class Seq;
     27class Start;
     28class SymDiff;
     29class Union;
     30
     31class RE {
    1432public:
    15     typedef std::vector<RE*>            Vector;
    16     virtual ~RE();
     33    enum class ClassTypeId : unsigned {
     34        Alt
     35        , CC
     36        , Diff
     37        , End
     38        , Intersect
     39        , Name
     40        , Permute
     41        , Rep
     42        , Seq
     43        , Start
     44        , SymDiff
     45        , Union
     46    };
     47    ClassTypeId getClassTypeId() const {
     48        return mClassTypeId;
     49    }
     50    virtual RE * clone() const = 0;
     51    virtual ~RE() = 0;
    1752protected:
    18     RE();
     53    inline RE(const ClassTypeId id)
     54    : mClassTypeId(id) {
     55
     56    }
     57    const ClassTypeId mClassTypeId;
    1958};
     59
     60template <typename To, typename From>
     61inline static bool isa(const From * object) {
     62    return To::classof(object);
     63}
     64
     65template <typename To, typename From>
     66inline static To * dyn_cast(From * object) {
     67    if (isa<To, From>(object)) {
     68        return reinterpret_cast<To *>(object);
     69    }
     70    return nullptr;
     71}
     72
     73class Vector : public RE, public std::vector<RE*> {
     74public:
     75    virtual ~Vector() {
     76        for (RE * re : *this) {
     77            delete re;
     78        }
     79    }
     80protected:
     81    inline Vector(const ClassTypeId id)
     82    : RE(id)
     83    , std::vector<RE*>()
     84    {
     85
     86    }
     87    inline Vector(const ClassTypeId id, const iterator begin, const iterator end)
     88    : RE(id)
     89    , std::vector<RE*>(begin, end) {
     90
     91    }
     92    inline Vector(const ClassTypeId id, const const_iterator begin, const const_iterator end, const bool deep_copy)
     93    : RE(id) {
     94        assert (deep_copy && "Not intended as a shallow copy constructor.");
     95        this->resize(std::distance(begin, end));
     96        for (auto i = begin; i != end; ++i) {
     97            this->assign(std::distance(begin, i), (*i)->clone());
     98        }
     99    }
     100};
     101
     102//class Pair : public RE {
     103//protected:
     104//    inline Pair(const ClassTypeId id)
     105//    : RE(id)
     106//    , _lh(nullptr)
     107//    , _rh(nullptr)
     108//    {
     109
     110//    }
     111//    inline Pair(const ClassTypeId id, const RE * lh, const RE * rh)
     112//    : RE(id)
     113//    , _lh(lh)
     114//    , _rh(rh)
     115//    {
     116
     117//    }
     118//    virtual ~Pair() {
     119//        delete _lh;
     120//        delete _rh;
     121//    }
     122//protected:
     123//    const RE * _lh;
     124//    const RE * _rh;
     125//};
     126
     127//static Diff * makeDiff(const RE * lh, const RE * rh);
     128
     129//static Intersect * makeIntersect(const RE * lh, const RE * rh);
     130
     131//static Permute * makePermute();
     132//static Permute * makePermute(Vector::iterator begin, Vector::iterator end);
     133
     134//static SymDiff * makeSymDiff(const RE * lh, const RE * rh);
     135
     136//static Union * makeUnion(const RE * lh, const RE * rh);
     137
     138}
    20139
    21140#endif // RE_H
  • icGREP/icgrep-devel/icgrep/re/re_reducer.cpp

    r4187 r4194  
    11#include "re_reducer.h"
     2#include "re_cc.h"
     3#include "re_name.h"
     4#include "re_start.h"
     5#include "re_end.h"
     6#include "re_seq.h"
     7#include "re_alt.h"
     8#include "re_rep.h"
    29#include <assert.h>
    310
    4 RE* RE_Reducer::reduce(RE* re, std::map<std::string, RE*>& re_map) {
    5     RE* retVal = nullptr;
     11namespace re {
     12
     13RE * RE_Reducer::reduce(RE * re, std::map<std::string, RE*>& re_map) {
     14    RE * retVal = re;
    615    assert (re);
    7     if (Alt* re_alt = dynamic_cast<Alt*>(re)) {
    8         Alt * new_alt = new Alt();
    9         for (RE * re : *re_alt) {
    10             new_alt->push_back(reduce(re, re_map));
    11         }
    12         retVal = new_alt;
    13     }
    14     else if (Seq* re_seq = dynamic_cast<Seq*>(re)) {
    15         Seq * new_seq = new Seq();
    16         for (RE * re : *re_seq) {
    17             new_seq->push_back(reduce(re, re_map));
    18         }
    19         if (re_seq->getType() == Seq::Byte) {
    20             //If this is a sequence of byte classes then this is a multibyte sequence for a Unicode character class.
    21             new_seq->setType(Seq::Byte);
    22             std::string seqname = new_seq->getName();
    23             re_map.insert(make_pair(seqname, new_seq));
    24             Name* name = new Name(seqname);
    25             name->setType(Name::Unicode);
    26             retVal = name;
    27         }
    28         else {
    29             retVal = new_seq;
     16    if (Alt * alt = dyn_cast<Alt>(re)) {
     17        for (auto i = alt->begin(); i != alt->end(); ++i) {
     18            *i = reduce(*i, re_map);
    3019        }
    3120    }
    32     else if (Rep* re_rep = dynamic_cast<Rep*>(re)) {
    33         retVal = new Rep(reduce(re_rep->getRE(), re_map), re_rep->getLB(), re_rep->getUB());
     21    else if (Seq * seq = dyn_cast<Seq>(re)) {
     22        for (auto i = seq->begin(); i != seq->end(); ++i) {
     23            *i = reduce(*i, re_map);
     24        }
     25        if (seq->getType() == Seq::Type::Byte) {
     26            //If this is a sequence of byte classes then this is a multibyte sequence for a Unicode character class.
     27            std::string seqname = seq->getName();
     28            re_map.insert(make_pair(seqname, seq));
     29            retVal = makeName(seqname, false, Name::Type::Unicode);
     30        }
    3431    }
    35     else if (CC* re_cc = dynamic_cast<CC*>(re)) {
    36         std::string ccname = re_cc->getName();
     32    else if (Rep * rep = dyn_cast<Rep>(re)) {
     33        rep->setRE(reduce(rep->getRE(), re_map));
     34    }
     35    else if (CC * cc = dyn_cast<CC>(re)) {
     36        std::string ccname = cc->getName();
    3737        //If the character class isn't in the map then add it.
    38         re_map.insert(make_pair(ccname, re_cc));
     38        re_map.insert(make_pair(ccname, cc));
    3939        //return a new name class with the name of the character class.
    40         retVal = new Name(ccname);
    41     }
    42     else if (Name* re_name = dynamic_cast<Name*>(re)) {
    43         retVal = new Name(re_name);
    44     }
    45     else if (dynamic_cast<Start*>(re)) {
    46         retVal = new Start();
    47     }
    48     else if (dynamic_cast<End*>(re)) {
    49         retVal = new End();
     40        retVal = makeName(ccname);
    5041    }
    5142    return retVal;
    5243}
     44
     45}
  • icGREP/icgrep-devel/icgrep/re/re_reducer.h

    r4187 r4194  
    44//Regular Expressions
    55#include "re_re.h"
    6 #include "re_cc.h"
    7 #include "re_name.h"
    8 #include "re_start.h"
    9 #include "re_end.h"
    10 #include "re_seq.h"
    11 #include "re_alt.h"
    12 #include "re_rep.h"
    13 
    146#include <algorithm>
    157#include <list>
    168#include <map>
     9
     10namespace re {
    1711
    1812class RE_Reducer
     
    2216};
    2317
     18}
     19
    2420#endif // RE_REDUCER_H
  • icGREP/icgrep-devel/icgrep/re/re_rep.h

    r4187 r4194  
    1010#include "re_re.h"
    1111
     12namespace re {
     13
    1214class Rep : public RE {
    1315public:
    14 
     16    static inline bool classof(const RE * re) {
     17        return re->getClassTypeId() == ClassTypeId::Rep;
     18    }
     19    static inline bool classof(const void *) {
     20        return false;
     21    }
     22    virtual RE * clone() const {
     23        return new Rep(*this);
     24    }
    1525    enum { UNBOUNDED_REP = -1 };
    16 
    17     Rep(RE* re, int lb, int ub);
    18     ~Rep();
    1926    RE * getRE() const;
    2027    void setRE(RE * re = nullptr);
    2128    int getLB() const;
    22     void setLB(int lb);
     29    void setLB(const int lb);
    2330    int getUB() const;
    24     void setUB(int ub);
     31    void setUB(const int ub);
     32    virtual ~Rep();
     33protected:
     34    friend Rep * makeRep(RE *, const int, const int);
     35    Rep(RE * re, const int lb, const int ub);
     36    Rep(const Rep & rep);
    2537private:
    2638    RE* mRE;
     
    2941};
    3042
    31 inline Rep::Rep(RE * re, int lb, int ub)
    32 : mRE(re)
     43inline Rep::Rep(RE * re, const int lb, const int ub)
     44: RE(ClassTypeId::Rep)
     45, mRE(re)
    3346, mLB(lb)
    3447, mUB(ub)
     48{
     49
     50}
     51
     52inline Rep::Rep(const Rep & rep)
     53: RE(ClassTypeId::Rep)
     54, mRE(rep.getRE()->clone())
     55, mLB(rep.getLB())
     56, mUB(rep.getUB())
    3557{
    3658
     
    5375}
    5476
    55 inline void Rep::setLB(int lb) {
     77inline void Rep::setLB(const int lb) {
    5678    mLB = lb;
    5779}
     
    6183}
    6284
    63 inline void Rep::setUB(int ub) {
     85inline void Rep::setUB(const int ub) {
    6486    mUB = ub;
    6587}
    6688
     89inline Rep * makeRep(RE * re, const int lower_bound, const int upper_bound) {
     90    return new Rep(re, lower_bound, upper_bound);
     91}
     92
     93}
     94
    6795#endif
  • icGREP/icgrep-devel/icgrep/re/re_seq.cpp

    r4187 r4194  
    66
    77#include "re_seq.h"
     8#include "re_cc.h"
     9#include "re_name.h"
    810
    9 
    10 Seq::Seq()
    11 : mType(Seq::Normal)
    12 {
    13 
    14 }
    15 
    16 Seq::Seq(const Type type)
    17 : mType(type)
    18 {
    19 
    20 }
    21 
    22 Seq::Seq(const Type type, iterator begin, iterator end)
    23 : std::vector<RE*>(begin, end)
    24 , mType(type)
    25 {
    26 
    27 }
    28 
    29 Seq::~Seq() {
    30     for (RE * re : *this) {
    31         delete re;
    32     }
    33 }
     11namespace re {
    3412
    3513std::string Seq::getName() const {
    36     if (mType == Seq::Byte) {
     14    if (mType == Seq::Type::Byte) {
    3715        std::string name = "Seq";
    38         for (RE * re : *this) {
    39             if (CC* seq_cc = dynamic_cast<CC*>(re)) {
     16        for (const RE * re : *this) {
     17            if (const CC* seq_cc = dyn_cast<const CC>(re)) {
    4018                name += seq_cc->getName();
    4119            }
    42             else if (Name* seq_name = dynamic_cast<Name*>(re)) {
     20            else if (const Name* seq_name = dyn_cast<const Name>(re)) {
    4321                name += seq_name->getName();
    4422            }
     
    5432}
    5533
    56 Seq::Type Seq::getType() const {
    57     return mType;
    5834}
    59 
    60 void Seq::setType(Seq::Type type) {
    61     mType = type;
    62 }
  • icGREP/icgrep-devel/icgrep/re/re_seq.h

    r4187 r4194  
    55 */
    66
    7 #ifndef JOIN_H
    8 #define JOIN_H
     7#ifndef RE_SEQ_H
     8#define RE_SEQ_H
    99
    1010#include "re_re.h"
    11 #include "re_cc.h"
    12 #include "re_name.h"
    13 #include <list>
    14 #include <sstream>
    15 #include <utility>
     11#include <string>
    1612
    17 class Seq : public RE, public RE::Vector {
     13namespace re {
     14
     15class Seq : public Vector {
    1816public:
    19     typedef RE::Vector Vector;
    20     typedef enum {
    21         Normal,
    22         Byte
    23     } Type;
    24     Seq();
    25     Seq(const Type type);
    26     Seq(const Type type, iterator begin, iterator end);
    27     virtual ~Seq();
     17    static inline bool classof(const RE * re) {
     18        return re->getClassTypeId() == ClassTypeId::Seq;
     19    }
     20    static inline bool classof(const void *) {
     21        return false;
     22    }
     23    virtual RE * clone() const {
     24        return new Seq(*this);
     25    }
     26    enum class Type {
     27        Normal
     28        , Byte
     29    };
    2830    std::string getName() const;
    29     Type getType() const;
    30     void setType(Type type);
     31    inline Type getType() const {
     32        return mType;
     33    }
     34    inline void setType(const Type type) {
     35        mType = type;
     36    }
     37    virtual ~Seq() {}
     38protected:
     39    friend Seq * makeSeq(const Seq::Type);
     40    friend Seq * makeSeq(const Seq::Type, Seq::iterator, Seq::iterator);
     41    Seq(const Type type)
     42    : Vector(ClassTypeId::Seq)
     43    , mType(type) {
     44
     45    }
     46    Seq(const Seq & seq)
     47    : Vector(ClassTypeId::Seq, seq.cbegin(), seq.cend(), true)
     48    , mType(seq.mType) {
     49
     50    }
     51    Seq(const Type type, iterator begin, iterator end)
     52    : Vector(ClassTypeId::Seq, begin, end)
     53    , mType(type)
     54    {
     55
     56    }
    3157private:
    3258    Type    mType;
    3359};
     60
     61inline Seq * makeSeq(const Seq::Type type = Seq::Type::Normal) {
     62    return new Seq(type);
     63}
     64
     65inline Seq * makeSeq(const Seq::Type type, Seq::iterator begin, Seq::iterator end) {
     66    return new Seq(type, begin, end);
     67}
     68
     69}
    3470
    3571#endif // JOIN_H
  • icGREP/icgrep-devel/icgrep/re/re_simplifier.cpp

    r4187 r4194  
    1111#include <queue>
    1212
    13 RE* RE_Simplifier::simplify(RE * re) {
    14     RE * retVal = re;
    15     if (Alt * re_alt = dynamic_cast<Alt*>(re)) {
    16         Vector simplified_alt;
    17         for (RE * re : *re_alt) {
    18             simplified_alt.push_back(simplify(re));
     13namespace re {
     14
     15RE * RE_Simplifier::simplify(RE * re) {
     16    if (Alt * alt = dyn_cast<Alt>(re)) {
     17        for (auto i = alt->begin(); i != alt->end(); ++i) {
     18            *i = simplify(*i);
    1919        }
    20         retVal = makeAlt(simplified_alt);
     20        re = simplify(alt);
    2121    }
    22     else if (Seq * re_seq = dynamic_cast<Seq*>(re)) {
    23         Vector simplified_seq;
    24         for (RE * re : *re_seq)
    25         {
    26             simplified_seq.push_back(simplify(re));
     22    else if (Seq * seq = dyn_cast<Seq>(re)) {
     23        for (auto i = seq->begin(); i != seq->end(); ++i) {
     24            *i = simplify(*i);
    2725        }
    28         retVal = makeSeq(re_seq->getType(), simplified_seq);
     26        re = simplify(seq);
    2927    }
    30     else if (CC* re_cc = dynamic_cast<CC*>(re)) {
    31         retVal = re_cc;
     28    else if (Rep * rep = dyn_cast<Rep>(re)) {
     29        rep->setRE(simplify(rep->getRE()));
     30        simplify(rep);
    3231    }
    33     else if (Name* re_name = dynamic_cast<Name*>(re)) {
    34         retVal = new Name(re_name);
    35     }
    36     else if (Rep* re_rep = dynamic_cast<Rep*>(re)) {
    37         retVal = makeRep(simplify(re_rep->getRE()), re_rep->getLB(), re_rep->getUB());
    38     }
    39     else if (dynamic_cast<Start*>(re)) {
    40         retVal = new Start();
    41     }
    42     else if (dynamic_cast<End*>(re)) {
    43         retVal = new End();
    44     }
    45     return retVal;
     32    return re;
    4633}
    4734
    48 RE * RE_Simplifier::makeSeq(const Seq::Type type, Vector & list) {
     35RE * RE_Simplifier::simplify(Seq * seq) {
    4936    /*
    5037      mkSeq - make a sequence, but flatten.  Result might not be a Seq. If
     
    5239    */
    5340
    54     RE * re = nullptr;
    55     if (!list.empty()) {
    56         std::unique_ptr<Seq> seq = std::unique_ptr<Seq>(new Seq(type));
     41    RE * re = seq;
     42    if (!seq->empty()) {
     43        std::vector<RE*> list;
     44        list.reserve(seq->size());
    5745        // Reverse the order of the input list so we can more efficiently "pull" the first
    58         // character from the end. Note: this ought to be an inplace reversal.
    59         std::reverse(list.begin(), list.end());
     46        // character from the end. Note: this uses a linear inplace reversal.
     47        std::reverse(seq->begin(), seq->end());
    6048
    61         while (!list.empty()) {
    62             RE * next = list.back();
    63             list.pop_back();
    64             if (Seq * re_seq = dynamic_cast<Seq*>(next)) {
    65                 if (re_seq->getType() != Seq::Byte) {
    66                     // like above, insert the "subsequence" in reverse order
    67                     list.reserve(re_seq->size());
    68                     std::reverse_copy(re_seq->begin(), re_seq->end(), std::back_inserter(list));
     49        while (!seq->empty()) {
     50            RE * next = seq->back();
     51            seq->pop_back();
     52            if (Seq * re_seq = dyn_cast<Seq>(next)) {
     53                if (re_seq->getType() != Seq::Type::Byte) {
     54                    // like above, insert the "subsequence" to flatten in reverse order
     55                    std::reverse_copy(re_seq->begin(), re_seq->end(), std::back_inserter(*seq));
     56                    re_seq->clear();
     57                    delete re_seq;
    6958                    continue;
    7059                }
    7160            }
    72             seq->push_back(next);
     61            list.push_back(next);
    7362        }
    74         if (seq->size() == 1) {
    75             re = seq->back();
    76             seq->pop_back();
     63        if (list.size() == 1) {
     64            re = list.back();
     65            delete seq;
    7766        }
    7867        else {
    79             re = seq.release();
     68            seq->swap(list);
    8069        }
    8170    }
     
    9281 * @return simplified RE representing the Alt
    9382 */
    94 RE * RE_Simplifier::makeAlt(Vector & list) {
    95     RE * re = nullptr;
    96     if (!list.empty()) {
     83RE * RE_Simplifier::simplify(Alt * alt) {
     84    RE * re = alt;
     85    if (!alt->empty()) {
    9786
    98         std::unique_ptr<Alt> new_alt = std::unique_ptr<Alt>(new Alt());
    9987        std::queue<CC*> ccs;
    10088
    101         while (!list.empty()) {
    102             RE * next = list.back();
    103             list.pop_back();
    104             if (Alt * re_alt = dynamic_cast<Alt*>(next)) {
    105                 list.insert(list.end(), re_alt->begin(), re_alt->end());
     89        std::vector<RE *> list;
     90        while (!alt->empty()) {
     91            RE * next = alt->back();
     92            alt->pop_back();
     93            if (Alt * re_alt = dyn_cast<Alt>(next)) {
     94                alt->insert(alt->end(), re_alt->begin(), re_alt->end());
     95                re_alt->clear();
     96                delete re_alt;
    10697            }
    107             else if (CC * cc = dynamic_cast<CC*>(next)) {
     98            else if (CC * cc = dyn_cast<CC>(next)) {
    10899                ccs.push(cc);
    109100            }
    110101            else {
    111                 new_alt->push_back(next);
     102                list.push_back(next);
    112103            }
    113104        }
     
    117108                CC * a = ccs.front(); ccs.pop();
    118109                CC * b = ccs.front(); ccs.pop();
    119                 ccs.push(new CC(a, b));
     110                ccs.push(makeCC(a, b));
    120111            }
    121             new_alt->push_back(ccs.front());
     112            list.push_back(ccs.front());
    122113        }
    123114
    124         if (new_alt->size() == 1) {
     115        if (list.size() == 1) {
    125116            // if only one alternation exists, discard the Alt object itself and return the internal RE.
    126             re = new_alt->back();
    127             new_alt->pop_back();
     117            re = list.back();
     118            delete alt;
    128119        }
    129120        else {
    130             re = cse(new_alt.release());
     121            alt->swap(list);
    131122        }
    132123    }
     
    134125}
    135126
    136 inline RE * RE_Simplifier::cse(Alt * alt) {
    137 
    138 
    139 
    140 
    141     return alt;
    142 }
    143 
    144 
    145 RE * RE_Simplifier::makeRep(RE * re, const int lb, const int ub)
    146 {
    147     if (Rep* rep = dynamic_cast<Rep*>(re)) {
    148         if (((rep->getUB() == Rep::UNBOUNDED_REP) && (lb > 0)) ||
    149                 ((rep->getUB() == Rep::UNBOUNDED_REP) && (rep->getLB() <= 1))) {
    150             return new Rep(rep->getRE(), rep->getLB() * lb, Rep::UNBOUNDED_REP);
     127RE * RE_Simplifier::simplify(Rep * rep) {
     128    RE * re = rep->getRE();
     129    const int lb = rep->getLB();
     130    const int ub = rep->getUB();
     131    std::unique_ptr<Rep> janitor(rep);
     132    rep->setRE(nullptr);
     133    if (Rep * nrep = dyn_cast<Rep>(re)) {
     134        if (nrep->getUB() == Rep::UNBOUNDED_REP) {
     135            if ((lb > 0) || (nrep->getLB() <= 1)) {
     136                nrep->setLB(nrep->getLB() * lb);
     137                nrep->setUB(Rep::UNBOUNDED_REP);
     138                return simplify(nrep);
     139            }
     140            else if (lb == 0) {
     141                nrep->setLB(0);
     142                nrep->setUB(1);
     143                return simplify(nrep);
     144            }
    151145        }
    152         else if ((rep->getUB() == Rep::UNBOUNDED_REP) && (lb == 0)) {
    153             return new Rep(rep, 0, 1);
    154         }
    155         else if ((rep->getUB() * lb) >= (rep->getLB() * (lb + 1) - 1)) {
    156             return new Rep(rep->getRE(), rep->getLB() * lb, ubCombine(rep->getUB(), ub));
    157         }
    158         else {
    159             return new Rep(rep, lb, ub);
     146        else if ((nrep->getUB() * lb) >= (nrep->getLB() * (lb + 1) - 1)) {
     147            nrep->setLB(nrep->getUB() * lb);
     148            nrep->setUB(ubCombine(nrep->getUB(), ub));
     149            return simplify(nrep);
    160150        }
    161151    }
    162152    else {
    163         if (Seq * seq = dynamic_cast<Seq*>(re)) {
     153        if (Seq * seq = dyn_cast<Seq>(re)) {
    164154            if (seq->empty()) {
    165155                return seq;
    166156            }
    167157        }
    168 
    169158        if ((lb == 0) && (ub == 0)) {
    170             return new Seq();
     159            delete re;
     160            return makeSeq();
    171161        }
    172162        else if ((lb == 1) && (ub == 1)) {
    173163            return re;
    174164        }
    175         else {
    176             return new Rep(re, lb, ub);
    177         }
    178165    }
     166    rep->setRE(re);
     167    return janitor.release();
    179168}
    180169
     
    187176    }
    188177}
     178
     179}
  • icGREP/icgrep-devel/icgrep/re/re_simplifier.h

    r4187 r4194  
    66#include <list>
    77
     8namespace re {
     9
    810class Alt;
    911
    1012class RE_Simplifier {
    11     typedef RE::Vector Vector;
    1213public:
    13     static RE * makeAlt(Vector & list);
    14     static RE * cse(Alt * alt);
    15     static RE * makeSeq(const Seq::Type type, Vector & list);
    16     static RE * makeRep(RE * re, const int lb, const int ub);
    17     static RE * simplify(RE* re);
     14    static RE * simplify(Alt * alt);
     15    static RE * simplify(Seq * seq);
     16    static RE * simplify(Rep * rep);
     17    static RE * simplify(RE * re);
    1818private:
    1919    static int ubCombine(const int h1, const int h2);
    2020};
    2121
     22}
     23
    2224#endif // RE_SIMPLIFIER_H
  • icGREP/icgrep-devel/icgrep/re/re_start.cpp

    r4187 r4194  
    66
    77#include "re_start.h"
    8 
    9 Start::Start(){}
    10 Start::~Start(){}
  • icGREP/icgrep-devel/icgrep/re/re_start.h

    r4187 r4194  
    1010#include "re_re.h"
    1111
    12 class Start : public RE
    13 {
     12#include "re_re.h"
     13
     14namespace re {
     15
     16class Start : public RE {
    1417public:
    15     Start();
    16     ~Start();
     18    static inline bool classof(const RE * re) {
     19        return re->getClassTypeId() == ClassTypeId::Start;
     20    }
     21    static inline bool classof(const void *) {
     22        return false;
     23    }
     24    virtual RE * clone() const {
     25        return new Start();
     26    }
     27protected:
     28    friend Start * makeStart();
     29    Start() : RE(ClassTypeId::Start) {}
     30    virtual ~Start() {}
    1731};
     32
     33inline Start * makeStart() {
     34    return new Start();
     35}
     36
     37}
    1838
    1939#endif // START_H
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4187 r4194  
    1818#include <stdexcept>
    1919
    20 RE* UTF8_Encoder::toUTF8(RE* re) {
    21 
    22     RE* retVal = nullptr;
    23     if (Alt* re_alt = dynamic_cast<Alt*>(re)) {
    24         Alt * new_alt = new Alt();
    25         for (RE * re : *re_alt) {
    26             new_alt->push_back(toUTF8(re));
    27         }
    28         retVal = new_alt;
    29     }
    30     else if (Seq * re_seq = dynamic_cast<Seq*>(re)) {
     20using namespace re;
     21
     22RE * UTF8_Encoder::toUTF8(RE* re) {
     23    if (Alt * alt = dyn_cast<Alt>(re)) {
     24        for (auto i = alt->begin(); i != alt->end(); ++i) {
     25            *i = toUTF8(*i);
     26        }
     27    }
     28    else if (Seq * seq = dyn_cast<Seq>(re)) {
    3129        //If this is a previously encoded Unicode byte sequence.
    32         if (re_seq->getType() == Seq::Byte) {
     30        if (seq->getType() == Seq::Type::Byte) {
    3331            throw std::runtime_error("Unexpected UTF Byte Sequence given to UTF8 Encoder.");
    3432        }
    35         Seq * new_seq = new Seq(Seq::Normal);
    36         for (RE * re : *re_seq) {
    37             new_seq->push_back(toUTF8(re));
    38         }
    39         retVal = new_seq;
    40     }
    41     else if (Rep* re_rep = dynamic_cast<Rep*>(re))
    42     {
    43         retVal = new Rep(toUTF8(re_rep->getRE()), re_rep->getLB(), re_rep->getUB());
    44     }
    45     else if (CC* re_cc = dynamic_cast<CC*>(re))
    46     { 
    47         if (re_cc->size() == 1)
    48         {
    49             retVal = rangeToUTF8(re_cc->front());
    50         }
    51         else if (re_cc->size() > 1) {
    52             RE::Vector re_list;
    53             for (const CharSetItem & item : *re_cc) {
    54                 re_list.push_back(rangeToUTF8(item));
     33        for (auto i = seq->begin(); i != seq->end(); ++i) {
     34            *i = toUTF8(*i);
     35        }
     36    }
     37    else if (CC * cc = dyn_cast<CC>(re)) {
     38        if (cc->size() == 1) {
     39            re = rangeToUTF8(cc->front());
     40            delete cc;
     41        }
     42        else if (cc->size() > 1) {
     43            Alt * alt = makeAlt();
     44            for (const CharSetItem & item : *cc) {
     45                alt->push_back(rangeToUTF8(item));
    5546            }
    56             retVal = RE_Simplifier::makeAlt(re_list);
    57         }
    58     }
    59     else if (Name* re_name = dynamic_cast<Name*>(re)) {
    60         retVal = new Name(re_name);
    61     }
    62     else if (dynamic_cast<Start*>(re)) {
    63         retVal = new Start();
    64     }
    65     else if (dynamic_cast<End*>(re)) {
    66         retVal = new End();
    67     }
    68 
    69     return retVal;
     47            re = RE_Simplifier::simplify(alt);
     48            delete cc;
     49        }
     50    }
     51    else if (Rep * rep = dyn_cast<Rep>(re)) {
     52        rep->setRE(toUTF8(rep->getRE()));
     53    }
     54    return re;
    7055}
    7156
     
    7358    int u8len_lo = u8len(item.lo_codepoint);
    7459    int u8len_hi = u8len(item.hi_codepoint);
    75     if (u8len_lo < u8len_hi)
    76     {
     60    if (u8len_lo < u8len_hi) {
    7761        int m = max_of_u8len(u8len_lo);
    78         Alt* alt = new Alt();
    79 
    80         CharSetItem lo_item;
    81         lo_item.lo_codepoint = item.lo_codepoint;
    82         lo_item.hi_codepoint = m;
    83         alt->push_back(rangeToUTF8(lo_item));
    84         CharSetItem hi_item;
    85         hi_item.lo_codepoint = m + 1;
    86         hi_item.hi_codepoint = item.hi_codepoint;
    87         alt->push_back(rangeToUTF8(hi_item));
    88 
     62        Alt* alt = makeAlt();
     63        alt->push_back(rangeToUTF8(CharSetItem(item.lo_codepoint, m)));
     64        alt->push_back(rangeToUTF8(CharSetItem(m + 1, item.hi_codepoint)));
    8965        return alt;
    9066    }
    91     else
    92     {
     67    else {
    9368        return rangeToUTF8_helper(item.lo_codepoint, item.hi_codepoint, 1, u8len_hi);
    9469    }
     
    10681    else if (hbyte == lbyte)
    10782    {
    108         Seq* seq = new Seq();
    109         seq->setType((u8Prefix(hbyte) ? Seq::Byte : Seq::Normal));
     83        Seq* seq = makeSeq();
     84        seq->setType((u8Prefix(hbyte) ? Seq::Type::Byte : Seq::Type::Normal));
    11085        seq->push_back(makeByteClass(hbyte));
    11186        seq->push_back(rangeToUTF8_helper(lo, hi, n+1, hlen));
     
    12095            int hi_floor = (~suffix_mask) & hi;
    12196
    122             Alt* alt = new Alt();
     97            Alt* alt = makeAlt();
    12398            alt->push_back(rangeToUTF8_helper(hi_floor, hi, n, hlen));
    12499            alt->push_back(rangeToUTF8_helper(lo, hi_floor - 1, n, hlen));
     
    129104            int low_ceil = lo | suffix_mask;
    130105
    131             Alt* alt = new Alt();
     106            Alt* alt = makeAlt();
    132107            alt->push_back(rangeToUTF8_helper(low_ceil + 1, hi, n, hlen));
    133108            alt->push_back(rangeToUTF8_helper(lo, low_ceil, n, hlen));
     
    136111        else
    137112        {
    138             Seq* seq = new Seq();
    139             seq->setType((u8Prefix(hbyte) ? Seq::Byte : Seq::Normal));
     113            Seq* seq = makeSeq();
     114            seq->setType((u8Prefix(hbyte) ? Seq::Type::Byte : Seq::Type::Normal));
    140115            seq->push_back(makeByteRange(lbyte, hbyte));
    141116            seq->push_back(rangeToUTF8_helper(lo, hi, n + 1, hlen));
     
    152127CC* UTF8_Encoder::makeByteRange(int lo, int hi)
    153128{
    154     return new CC(lo, hi);
     129    return makeCC(lo, hi);
    155130}
    156131
    157132CC* UTF8_Encoder::makeByteClass(int byteval)
    158133{
    159     return new CC(byteval, byteval);
     134    return makeCC(byteval, byteval);
    160135}
    161136
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r4187 r4194  
    1616{
    1717public:
    18     static RE* toUTF8(RE * re);
     18    static re::RE* toUTF8(re::RE * re);
    1919private:
    20     static RE* rangeToUTF8(const CharSetItem &item);
    21     static RE* rangeToUTF8_helper(int lo, int hi, int n, int hlen);
    22     static CC* makeByteClass(int byteval);
    23     static CC* makeByteRange(int lo, int hi);
     20    static re::RE* rangeToUTF8(const re::CharSetItem &item);
     21    static re::RE* rangeToUTF8_helper(int lo, int hi, int n, int hlen);
     22    static re::CC* makeByteClass(int byteval);
     23    static re::CC* makeByteRange(int lo, int hi);
    2424
    2525    static bool u8Prefix(int cp);
Note: See TracChangeset for help on using the changeset viewer.