Ignore:
Timestamp:
Sep 23, 2014, 3:15:47 PM (5 years ago)
Author:
nmedfort
Message:

Some refactoring of the RE CC class and CC Compiler; Moved RE into re subdirectory.

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
30 edited
1 copied
27 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4182 r4187  
    4949add_library(PabloADT pe_advance.cpp  pe_all.cpp  pe_and.cpp pe_call.cpp pe_charclass.cpp  pe_matchstar.cpp pe_scanthru.cpp pe_not.cpp  pe_or.cpp  pe_pabloe.cpp  pe_sel.cpp  pe_var.cpp  pe_xor.cpp ps_assign.cpp  ps_if.cpp  ps_pablos.cpp  ps_while.cpp printer_pablos.cpp)
    5050
    51 add_library(RegExpADT re_alt.cpp  re_cc.cpp  re_end.cpp  re_name.cpp re_parser.cpp  re_re.cpp  re_rep.cpp  re_seq.cpp  re_start.cpp parsefailure.cpp  printer_re.cpp)
    52 
     51add_library(RegExpADT re/re_alt.cpp  re/re_cc.cpp  re/re_end.cpp  re/re_name.cpp re/re_parser.cpp  re/re_re.cpp  re/re_rep.cpp  re/re_seq.cpp re/re_start.cpp re/parsefailure.cpp  printer_re.cpp)
    5352
    5453
     
    6059
    6160# add the executable
    62 add_executable(icgrep icgrep.cpp llvm_gen.cpp llvm_gen_helper.cpp utf_encoding.cpp cc_compiler.cpp  cc_compiler_helper.cpp re_simplifier.cpp re_reducer.cpp re_nullable.cpp re_compiler.cpp pbix_compiler.cpp  symbol_generator.cpp utf8_encoder.cpp unicode_categories.h unicode_categories-flat.h unicode_categories-simple.h)
     61add_executable(icgrep icgrep.cpp llvm_gen.cpp llvm_gen_helper.cpp utf_encoding.cpp cc_compiler.cpp  cc_compiler_helper.cpp re/re_simplifier.cpp re/re_reducer.cpp re/re_nullable.cpp re/re_compiler.cpp pbix_compiler.cpp  symbol_generator.cpp utf8_encoder.cpp unicode_categories.h unicode_categories-flat.h unicode_categories-simple.h)
    6362
    6463target_link_libraries (icgrep PabloADT RegExpADT ${REQ_LLVM_LIBRARIES})
  • icGREP/icgrep-devel/icgrep/cc_compiler.cpp

    r4182 r4187  
    99#include "utf_encoding.h"
    1010#include "cc_compiler_helper.h"
     11#include "pe_sel.h"
     12#include "pe_advance.h"
     13#include "pe_all.h"
     14#include "pe_and.h"
     15#include "pe_charclass.h"
     16#include "pe_matchstar.h"
     17#include "pe_not.h"
     18#include "pe_or.h"
     19#include "pe_var.h"
     20#include "pe_xor.h"
    1121
    1222#include <math.h>
    1323#include <utility>
    1424#include <iostream>
    15 #include <sstream>
    1625#include <string>
    1726#include <list>
     
    2130#include <cassert>
    2231#include <stdlib.h>
     32#include <stdexcept>
    2333
    2434CC_Compiler::CC_Compiler(const UTF_Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern)
     
    3242    for (int i = 0; i < mEncoding.getBits(); i++)
    3343    {
    34         std::string b_pattern = bit_var((mEncoding.getBits() -1) - i);
     44        std::string b_pattern = bit_var((mEncoding.getBits() - 1) - i);
    3545        Expression* expr = new Expression();
    3646        expr->expr_string  =  b_pattern;
     
    5767    mapped_value->pablo_expr = new Var(varname);
    5868
    59     std::pair<std::map<std::string, Expression*>::iterator, bool> ret = mCommon_Expression_Map.insert(make_pair(key_value, mapped_value));
     69    std::pair<MapIterator, bool> ret = mCommon_Expression_Map.insert(make_pair(key_value, mapped_value));
    6070
    6171    return ret.first->second;
    6272}
    6373
    64 Expression* CC_Compiler::expr_to_variable(Expression* expr)
    65 {
    66     if (mCommon_Expression_Map.count(expr->expr_string) > 0)
    67     {
    68         std::map<std::string, Expression*>::iterator itGet = mCommon_Expression_Map.find(expr->expr_string);
    69         return itGet->second;
    70     }
    71     else
    72     {
    73         mGenSymCounter++;
    74         std::string sym = mGenSym_Template + std::to_string(mGenSymCounter);
    75         return add_assignment(sym, expr);
     74Expression* CC_Compiler::expr_to_variable(Expression * expr) {
     75    MapIterator itr = mCommon_Expression_Map.find(expr->expr_string);
     76    if (itr != mCommon_Expression_Map.end()) {
     77        return itr->second;
     78    }
     79    else {
     80        return add_assignment(mGenSym_Template + std::to_string(++mGenSymCounter), expr);
    7681    }
    7782}
     
    9095}
    9196
    92 void CC_Compiler::compile_from_map(const std::map<std::string, RE*>& re_map)
     97void CC_Compiler::compile_from_map(const REMap &re_map)
    9398{
    9499    process_re_map(re_map);
    95100}
    96101
    97 void CC_Compiler::process_re_map(const std::map<std::string, RE*>& re_map)
    98 {
    99     for (auto it =  re_map.rbegin(); it != re_map.rend(); ++it)
    100     {
     102void CC_Compiler::process_re_map(const REMap & re_map) {
     103    for (auto it =  re_map.crbegin(); it != re_map.crend(); ++it) {
    101104        process_re(it->second);
    102105    }
    103106}
    104107
    105 void CC_Compiler::process_re(RE* re)
    106 {
    107 
    108     if (Alt* re_alt = dynamic_cast<Alt*>(re)) {
    109         for (RE * re : *re_alt) {
     108void CC_Compiler::process_re(const RE* re) {
     109    if (const Alt* re_alt = dynamic_cast<const Alt*>(re)) {
     110        for (const RE * re : *re_alt) {
    110111            process_re(re);
    111112        }
    112113    }
    113     else if (CC* re_cc = dynamic_cast<CC*>(re)) {
     114    else if (const CC* re_cc = dynamic_cast<const CC*>(re)) {
    114115        cc2pablos(re_cc);
    115116    }
    116     else if (Rep* re_rep = dynamic_cast<Rep*>(re)) {
     117    else if (const Rep* re_rep = dynamic_cast<const Rep*>(re)) {
    117118        process_re(re_rep->getRE());
    118119    }
    119     else if (Seq* re_seq = dynamic_cast<Seq*>(re)) {
    120         for (RE * re : *re_seq) {
     120    else if (const Seq* re_seq = dynamic_cast<const Seq*>(re)) {
     121        for (const RE * re : *re_seq) {
    121122            process_re(re);
    122123        }
     
    152153        bit_no++;
    153154    }
    154 /*
    155     std::cout << "FIRST LOOP:" << std::endl;
    156     for (int i = bit_terms.size() - 1; i >= 0; i--)
    157     {
    158         std::cout << StatementPrinter::ShowPabloE(bit_terms.at(i)) << std::endl;
    159     }
    160 */
     155
    161156    //Reduce the list so that all of the expressions are contained within a single expression.
    162157    while (bit_terms.size() > 1)
     
    171166            new_terms.push_back(bit_terms[bit_terms.size() -1]);
    172167        }
    173 /*
    174         std::cout << "\nNEW TERMS ITERATION:\n" << std::endl;
    175         for (int i = new_terms.size() - 1; i >=0; i--)
    176         {
    177             std::cout <<  StatementPrinter::ShowPabloE(new_terms[i]) << std::endl;
    178         }
    179         std::cout << "\n" << std::endl;
    180 */
    181168        std::vector<PabloE*>::iterator it;
    182169        bit_terms.assign(new_terms.begin(), new_terms.end());
    183170    }
    184 /*
    185     std::cout << "bit_terms.size(): " << bit_terms.size() << std::endl;
    186     std::cout << StatementPrinter::ShowPabloE(bit_terms[0]) << std::endl;
    187 */
    188171    return bit_terms[0];
    189172}
    190173
    191 PabloE* CC_Compiler::char_test_expr(int ch)
     174PabloE* CC_Compiler::char_test_expr(const CodePointType ch)
    192175{
    193176    return bit_pattern_expr(ch, mEncoding.getMask());
    194177}
    195178
    196 PabloE* CC_Compiler::make_range(int n1, int n2)
    197 {
    198     unsigned char diff_bits = n1 ^ n2;
    199     int diff_count = 0;
     179PabloE* CC_Compiler::make_range(const CodePointType n1, const CodePointType n2)
     180{
     181    CodePointType diff_bits = n1 ^ n2;
     182    CodePointType diff_count = 0;
    200183
    201184    while (diff_bits > 0)
     
    207190    if ((n2 < n1) || (diff_count > mEncoding.getBits()))
    208191    {
    209         int n1i = n1;
    210         int n2i = n2;
    211 
    212         std::cout << "n1: " << n1i << std::endl;
    213         std::cout << "n2: " << n2i << std::endl;
    214 
    215         std::cout << "Exception: Bad Range!" << std::endl;
    216         return 0;
    217     }
    218 
    219     int mask = pow(2, diff_count) - 1;
    220 
    221     PabloE* common = bit_pattern_expr(n1 & ~mask, mEncoding.getMask() ^ mask);
     192        throw std::runtime_error(std::string("Bad Range: [") + std::to_string(n1) + "," + std::to_string(n2) + "]");
     193    }
     194
     195    const CodePointType mask0 = (static_cast<CodePointType>(1) << diff_count) - 1;
     196
     197    PabloE* common = bit_pattern_expr(n1 & ~mask0, mEncoding.getMask() ^ mask0);
    222198    if (diff_count == 0) return common;
    223199
    224     mask = pow(2, (diff_count - 1)) - 1;
    225 
    226     PabloE* lo_test = GE_Range(diff_count - 1, n1 & mask);
    227     PabloE* hi_test = LE_Range(diff_count - 1, n2 & mask);
     200    const CodePointType mask1 = (static_cast<CodePointType>(1) << (diff_count - 1)) - 1;
     201
     202    PabloE* lo_test = GE_Range(diff_count - 1, n1 & mask1);
     203    PabloE* hi_test = LE_Range(diff_count - 1, n2 & mask1);
    228204
    229205    return CC_Compiler_Helper::make_and(common, CC_Compiler_Helper::make_sel(make_bitv(diff_count - 1), hi_test, lo_test));
     
    279255      Handling this as a special case avoids an overflow issue with n+1 requiring more than N bits.
    280256    */
    281     if ((n+1) == pow(2, N))
    282     {
     257    if ((n + 1) == (1 << N)) {
    283258        return new All(1); //True.
    284259    }
    285     else
    286     {
    287         return CC_Compiler_Helper::make_not(GE_Range(N, n+1));
    288     }
    289 }
    290 
    291 PabloE* CC_Compiler::char_or_range_expr(CharSetItem charset_item)
    292 {
    293     if (charset_item.lo_codepoint == charset_item.hi_codepoint)
    294     {
    295         return char_test_expr(charset_item.lo_codepoint);
    296     }
    297     else
    298     {
    299         if (charset_item.lo_codepoint < charset_item.hi_codepoint)
    300         {
    301             return make_range(charset_item.lo_codepoint, charset_item.hi_codepoint);
    302         }
    303     }
    304 
    305     std::cout << "Exception: Bad Character Set Item!" << std::endl;
    306     return 0;
    307 }
    308 
    309 PabloE* CC_Compiler::charset_expr(CC* cc)
    310 {
    311     if (cc->getItems().size() == 0)
    312     {
     260    else {
     261        return CC_Compiler_Helper::make_not(GE_Range(N, n + 1));
     262    }
     263}
     264
     265PabloE* CC_Compiler::charset_expr(const CC * cc)
     266{
     267    if (cc->empty()) {
    313268        return new All(0);
    314269    }
    315270
    316     if (cc->getItems().size() > 1)
    317     {
     271    if (cc->size() > 1) {
    318272        bool combine = true;
    319 
    320         for (unsigned long i = 0; i < cc->getItems().size(); i++)
    321         {
    322             CharSetItem item = cc->getItems().at(i);
    323             if (item.lo_codepoint != item.hi_codepoint)
    324             {
     273        for (const CharSetItem & item : *cc) {
     274            if (item.lo_codepoint != item.hi_codepoint) {
    325275                combine = false;
    326276                break;
    327277            }
    328278        }
    329 
    330         if (combine)
    331         {
    332             for (unsigned long i = 0; i < cc->getItems().size() - 1; i ++)
    333             {
    334                 CharSetItem curr_item = cc->getItems().at(i);
    335                 CharSetItem next_item = cc->getItems().at(i + 1);
    336                 if (curr_item.lo_codepoint != next_item.lo_codepoint + 2)
    337                 {
     279        if (combine) {
     280            auto i = cc->cbegin();
     281            for (auto j = i; ++j != cc->cend(); i = j) {
     282                const CharSetItem & curr_item = *i;
     283                const CharSetItem & next_item = *j;
     284                if ((curr_item.lo_codepoint + 2) != next_item.lo_codepoint) {
    338285                    combine  = false;
    339286                    break;
    340287                }
    341288            }
    342         }
    343 
    344         if (combine)
    345         {
    346             CharSetItem first_item = cc->getItems().at(0);
    347             CharSetItem last_item = cc->getItems().at(cc->getItems().size() - 1);
    348             CharSetItem combined_item;
    349             combined_item.lo_codepoint = (last_item.lo_codepoint & 0xFE);
    350             combined_item.hi_codepoint = (first_item.hi_codepoint | 0x01);
    351 
    352             return char_or_range_expr(combined_item);
    353         }
    354     }
    355 
    356     PabloE* e1 = char_or_range_expr(cc->getItems().at(0));
    357     if (cc->getItems().size() > 1)
    358     {
    359         for (unsigned long i = 1; i < cc->getItems().size(); i++)
    360         {
    361             e1 = CC_Compiler_Helper::make_or(e1, char_or_range_expr(cc->getItems().at(i)));
    362         }
    363     }
    364 
    365     return e1;
    366 }
     289            if (combine) {
     290                const CodePointType lo = cc->front().lo_codepoint;
     291                const CodePointType hi = cc->back().lo_codepoint;
     292                PabloE * expr = make_range(lo & 0xFE, hi | 0x01);
     293                // should this be here? was in the prototype but not icgrep
     294                expr = CC_Compiler_Helper::make_and(expr, new All((lo & 1) == 1 ? 0 : 1));
     295                return expr;
     296            }
     297        }
     298    }
     299    PabloE * expr = nullptr;
     300    for (const CharSetItem & item : *cc) {
     301        PabloE * temp = char_or_range_expr(item.lo_codepoint, item.hi_codepoint);
     302        expr = (expr == nullptr) ? temp : CC_Compiler_Helper::make_or(expr, temp);
     303    }
     304    return expr;
     305}
     306
     307inline PabloE * CC_Compiler::char_or_range_expr(const CodePointType lo, const CodePointType hi) {
     308    if (lo == hi) {
     309        return char_test_expr(lo);
     310    }
     311    else if (lo < hi) {
     312        return make_range(lo, hi);
     313    }
     314    throw std::runtime_error(std::string("Invalid Character Set Range: [") + std::to_string(lo) + "," + std::to_string(hi) + "]");
     315}
     316
    367317
    368318Expression* CC_Compiler::expr2pabloe(PabloE* expr)
     
    382332            retExpr->pablo_expr = new All(1);
    383333        }
    384         else if (all->getNum() ==0)
     334        else if (all->getNum() == 0)
    385335        {
    386336            retExpr->expr_string = "All(0)";
     
    450400}
    451401
    452 void CC_Compiler::cc2pablos(CC* cc)
     402void CC_Compiler::cc2pablos(const CC * cc)
    453403{
    454404    add_assignment(cc->getName(), expr2pabloe(charset_expr(cc)));
  • icGREP/icgrep-devel/icgrep/cc_compiler.h

    r4132 r4187  
    1414#include "ps_pablos.h"
    1515#include "pe_pabloe.h"
    16 #include "pe_sel.h"
    17 #include "pe_advance.h"
    18 #include "pe_all.h"
    19 #include "pe_and.h"
    20 #include "pe_charclass.h"
    21 #include "pe_matchstar.h"
    22 #include "pe_not.h"
    23 #include "pe_or.h"
    24 #include "pe_var.h"
    25 #include "pe_xor.h"
    26 #include "re_cc.h"
     16#include "re/re_cc.h"
    2717
    2818struct Expression{
     
    3121};
    3222
     23class CC_Compiler{
     24    typedef std::map<std::string, RE*>          REMap;
     25    typedef std::map<std::string, Expression*>  ExpressionMap;
     26    typedef ExpressionMap::iterator             MapIterator;
    3327
    34 class CC_Compiler
    35 {
    3628public:
    3729    CC_Compiler(const UTF_Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern);
    3830    std::string compile1(CC* cc);   
    39     void compile_from_map(const std::map<std::string, RE*>& re_map);   
     31    void compile_from_map(const REMap & re_map);
    4032    std::list<PabloS*> get_compiled();
    4133private:
    42     void process_re_map(const std::map<std::string, RE*>& re_map);
    43     void process_re(RE* re);
     34    void process_re_map(const REMap &re_map);
     35    void process_re(const RE *re);
    4436    std::string bit_var(int n);
    4537    PabloE* make_bitv(int n);
    4638    PabloE* bit_pattern_expr(int pattern, int selected_bits);
    47     PabloE* char_test_expr(int ch);
    48     PabloE* make_range(int n1, int n2);
     39    PabloE* char_test_expr(const CodePointType ch);
     40    PabloE* make_range(const CodePointType n1, const CodePointType n2);
    4941    PabloE* GE_Range(int N, int n);
    5042    PabloE* LE_Range(int N, int n);
    51     PabloE* char_or_range_expr(CharSetItem charset_item);
    52     PabloE* charset_expr(CC* cc);
     43    PabloE* char_or_range_expr(const CodePointType lo, const CodePointType hi);
     44    PabloE* charset_expr(const CC *cc);
    5345    Expression* expr2pabloe(PabloE* expr);
    54     void cc2pablos(CC* cc);
     46    void cc2pablos(const CC *cc);
    5547
    5648    UTF_Encoding mEncoding;
     
    6456    int mGenSymCounter;
    6557    std::list<PabloS*> mStmtsl;
    66     std::map<std::string, Expression*> mCommon_Expression_Map;
     58    ExpressionMap mCommon_Expression_Map;
    6759};
    6860
  • icGREP/icgrep-devel/icgrep/cc_compiler_helper.cpp

    r4034 r4187  
    6767        }
    6868    }
    69     else if (equal_exprs(expr1, expr2 ))
     69    else if (equal_exprs(expr1, expr2))
    7070    {
    7171        return expr1;
     
    257257*/
    258258
    259 bool CC_Compiler_Helper::equal_exprs(PabloE *expr1, PabloE *expr2)
    260 {
    261     if (All* all_expr1 = dynamic_cast<All*>(expr1))
    262     {
    263         if (all_expr1->getNum() == 1)
    264         {
    265             if (All* all_expr2 = dynamic_cast<All*>(expr2))
    266             {
    267                 if (all_expr2->getNum() == 1)
    268                 {
    269                     return true;
    270                 }
    271                 else
    272                 {
    273                     return false;
    274                 }
    275             }
    276             else
    277             {
    278                 return false;
    279             }
    280         }
    281         else if (all_expr1->getNum() == 0)
    282         {
    283             if (All* all_expr2 = dynamic_cast<All*>(expr2))
    284             {
    285                 if (all_expr2->getNum() == 1)
    286                 {
    287                     return false;
    288                 }
    289                 else
    290                 {
    291                     return true;
    292                 }
    293             }
    294             else
    295             {
    296                 return false;
    297             }
    298         }
    299     }
    300 
    301     if (Var* var_expr1 = dynamic_cast<Var*>(expr1))
    302     {
    303         if (Var* var_expr2 = dynamic_cast<Var*>(expr2))
     259bool CC_Compiler_Helper::equal_exprs(const PabloE * expr1, const PabloE * expr2)
     260{
     261    if (const All * all_expr1 = dynamic_cast<const All*>(expr1))
     262    {
     263        if (const All * all_expr2 = dynamic_cast<const All*>(expr2))
     264        {
     265            return all_expr1->getNum() == all_expr2->getNum();
     266        }
     267    }
     268    else if (const Var * var_expr1 = dynamic_cast<const Var*>(expr1))
     269    {
     270        if (const Var * var_expr2 = dynamic_cast<const Var*>(expr2))
    304271        {
    305272            return (var_expr1->getVar() == var_expr2->getVar());
    306273        }
    307274    }
    308 
    309     if (Not* not_expr1 = dynamic_cast<Not*>(expr1))
    310     {
    311         if (Not* not_expr2 = dynamic_cast<Not*>(expr2))
     275    else if (const Not* not_expr1 = dynamic_cast<const Not*>(expr1))
     276    {
     277        if (const Not* not_expr2 = dynamic_cast<const Not*>(expr2))
    312278        {
    313279            return equal_exprs(not_expr1->getExpr(), not_expr2->getExpr());
    314280        }
    315281    }
    316 
    317     if (And* and_expr1 = dynamic_cast<And*>(expr1))
    318     {
    319         if (And* and_expr2 = dynamic_cast<And*>(expr2))
     282    else if (const And* and_expr1 = dynamic_cast<const And*>(expr1))
     283    {
     284        if (const And* and_expr2 = dynamic_cast<const And*>(expr2))
    320285        {
    321286            if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr1()))
     
    327292                return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr1());
    328293            }
    329             else
    330                 return false;
    331         }
    332     }
    333 
    334     if (Or* or_expr1 = dynamic_cast<Or*>(expr1))
    335     {
    336         if (Or* or_expr2 = dynamic_cast<Or*>(expr2))
     294        }
     295    }
     296    else if (const Or * or_expr1 = dynamic_cast<const Or*>(expr1))
     297    {
     298        if (const Or* or_expr2 = dynamic_cast<const Or*>(expr2))
    337299        {
    338300            if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr1()))
     
    344306                return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr1());
    345307            }
    346             else
    347                 return false;
    348         }
    349     }
    350 
    351     if (Xor* xor_expr1 = dynamic_cast<Xor*>(expr1))
    352     {
    353         if (Xor* xor_expr2 = dynamic_cast<Xor*>(expr2))
     308        }
     309    }
     310    else if (const Xor * xor_expr1 = dynamic_cast<const Xor *>(expr1))
     311    {
     312        if (const Xor * xor_expr2 = dynamic_cast<const Xor *>(expr2))
    354313        {
    355314            if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr1()))
     
    361320                return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr1());
    362321            }
    363             else
    364                 return false;
    365         }
    366     }
    367 
    368     if (Sel* sel_expr1 = dynamic_cast<Sel*>(expr1))
    369     {
    370         if (Sel* sel_expr2 = dynamic_cast<Sel*>(expr2))
     322        }
     323    }
     324    else if (const Sel* sel_expr1 = dynamic_cast<const Sel*>(expr1))
     325    {
     326        if (const Sel* sel_expr2 = dynamic_cast<const Sel*>(expr2))
    371327        {
    372328            if (equal_exprs(sel_expr1->getIf_expr(), sel_expr2->getIf_expr()))
     
    376332                    return equal_exprs(sel_expr1->getF_expr(), sel_expr2->getF_expr());
    377333                }
    378                 else
    379                     return false;
    380             }
    381             else
    382                 return false;
     334            }
    383335        }
    384336    }
  • icGREP/icgrep-devel/icgrep/cc_compiler_helper.h

    r3850 r4187  
    1919    static PabloE* make_sel(PabloE* if_expr, PabloE* t_expr, PabloE* f_expr);
    2020    static PabloE* make_xor(PabloE* expr1, PabloE* expr2);
    21     static bool equal_exprs(PabloE* expr1, PabloE* expr2);
     21    static bool equal_exprs(const PabloE *expr1, const PabloE *expr2);
    2222private:
    2323    CC_Compiler_Helper();
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4151 r4187  
    88
    99#include "utf_encoding.h"
    10 #include "re_compiler.h"
     10#include "re/re_compiler.h"
    1111
    1212#include <fstream>
     
    2929#include <simd-lib/buffer.hpp>
    3030#include <simd-lib/bitblock_iterator.hpp>
     31
     32#include "hrtime.h"
    3133
    3234// mmap system
  • icGREP/icgrep-devel/icgrep/llvm_gen.h

    r4151 r4187  
    1212//define this indicates that we use llvm.uadd.with.overflow for genAddWithCarry
    1313#define USE_UADD_OVERFLOW
    14 
    15 //Regular Expressions
    16 #include "re_re.h"
    17 #include "re_cc.h"
    18 #include "re_name.h"
    19 #include "re_start.h"
    20 #include "re_end.h"
    21 #include "re_seq.h"
    22 #include "re_alt.h"
    23 #include "re_rep.h"
    2414
    2515//Pablo Expressions
  • icGREP/icgrep-devel/icgrep/llvm_gen_helper.cpp

    r4086 r4187  
    66
    77#include "llvm_gen_helper.h"
     8
     9//Pablo Expressions
     10#include "pe_pabloe.h"
     11#include "pe_advance.h"
     12#include "pe_and.h"
     13#include "pe_charclass.h"
     14#include "pe_not.h"
     15#include "pe_or.h"
     16#include "pe_matchstar.h"
     17#include "pe_scanthru.h"
     18
     19//Pablo Statements
     20#include "ps_pablos.h"
     21#include "ps_assign.h"
     22#include "ps_if.h"
     23#include "ps_while.h"
    824
    925LLVM_Generator_Helper::LLVM_Generator_Helper(){}
  • icGREP/icgrep-devel/icgrep/llvm_gen_helper.h

    r3955 r4187  
    88#define LLVM_GENERATOR_HELPER_H
    99
    10 //Regular Expressions
    11 #include "re_re.h"
    12 #include "re_cc.h"
    13 #include "re_start.h"
    14 #include "re_end.h"
    15 #include "re_seq.h"
     10#include <list>
    1611
    17 //Pablo Expressions
    18 #include "pe_pabloe.h"
    19 #include "pe_advance.h"
    20 #include "pe_and.h"
    21 #include "pe_charclass.h"
    22 #include "pe_not.h"
    23 #include "pe_or.h"
    24 #include "pe_matchstar.h"
    25 #include "pe_scanthru.h"
    26 
    27 //Pablo Statements
    28 #include "ps_pablos.h"
    29 #include "ps_assign.h"
    30 #include "ps_if.h"
    31 #include "ps_while.h"
    32 
     12class PabloS;
     13class PabloE;
    3314
    3415class LLVM_Generator_Helper
  • icGREP/icgrep-devel/icgrep/pbix_compiler.cpp

    r4182 r4187  
    181181    else if (Rep* rep = dynamic_cast<Rep*>(re))
    182182    {
    183         if ((dynamic_cast<Name*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (rep->getUB()== UNBOUNDED_REP))
     183        if ((dynamic_cast<Name*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (rep->getUB()== Rep::UNBOUNDED_REP))
    184184        {
    185185            Name* rep_name = dynamic_cast<Name*>(rep->getRE());
     
    213213            cg_state.newsym = gs_retVal;
    214214        }
    215         else if (rep->getUB() == UNBOUNDED_REP)
     215        else if (rep->getUB() == Rep::UNBOUNDED_REP)
    216216        {
    217217            cg_state = UnboundedRep_helper(rep->getRE(), rep->getLB(), cg_state);
    218218        }
    219         else if (rep->getUB() != UNBOUNDED_REP)
     219        else if (rep->getUB() != Rep::UNBOUNDED_REP)
    220220        {
    221221            cg_state = BoundedRep_helper(rep->getRE(), rep->getLB(), rep->getUB(), cg_state);
  • icGREP/icgrep-devel/icgrep/pbix_compiler.h

    r4182 r4187  
    99
    1010//Regular Expressions
    11 #include "re_re.h"
    12 #include "re_name.h"
    13 #include "re_start.h"
    14 #include "re_end.h"
    15 #include "re_seq.h"
    16 #include "re_alt.h"
    17 #include "re_rep.h"
     11#include "re/re_re.h"
     12#include "re/re_name.h"
     13#include "re/re_start.h"
     14#include "re/re_end.h"
     15#include "re/re_seq.h"
     16#include "re/re_alt.h"
     17#include "re/re_rep.h"
    1818
    1919//Pablo Expressions
  • icGREP/icgrep-devel/icgrep/pe_all.cpp

    r3850 r4187  
    88
    99All::All(int num)
     10: mNum(num)
    1011{
    11     mNum = num;
     12
    1213}
    1314
    1415All::~All(){}
    1516
    16 int All::getNum()
     17int All::getNum() const
    1718{
    1819    return mNum;
  • icGREP/icgrep-devel/icgrep/pe_all.h

    r3850 r4187  
    1515    All(int num);
    1616    ~All();
    17     int getNum();
     17    int getNum() const;
    1818    void setNum(int num);
    1919private:
  • icGREP/icgrep-devel/icgrep/pe_and.cpp

    r3850 r4187  
    77#include "pe_and.h"
    88
    9 And::And(PabloE *expr1, PabloE *expr2)
     9And::And(PabloE * expr1, PabloE * expr2)
     10: mExpr1(expr1)
     11, mExpr2(expr2)
    1012{
    11     mExpr1 = expr1;
    12     mExpr2 = expr2;
     13
    1314}
    1415
     
    1920}
    2021
    21 PabloE* And::getExpr1()
     22PabloE* And::getExpr1() const
    2223{
    2324    return mExpr1;
    2425}
    2526
    26 PabloE* And::getExpr2()
     27PabloE* And::getExpr2() const
    2728{
    2829    return mExpr2;
  • icGREP/icgrep-devel/icgrep/pe_and.h

    r3850 r4187  
    1515    And(PabloE* expr1, PabloE* expr2);
    1616    ~And();
    17     PabloE* getExpr1();
    18     PabloE* getExpr2();
     17    PabloE* getExpr1() const;
     18    PabloE* getExpr2() const;
    1919private:
    2020    PabloE* mExpr1;
  • icGREP/icgrep-devel/icgrep/pe_not.cpp

    r3850 r4187  
    1717}
    1818
    19 PabloE* Not::getExpr()
     19PabloE* Not::getExpr() const
    2020{
    2121    return mExpr;
  • icGREP/icgrep-devel/icgrep/pe_not.h

    r3850 r4187  
    1515    Not(PabloE* expr);
    1616    ~Not();
    17     PabloE* getExpr();
     17    PabloE* getExpr() const;
    1818private:
    1919    PabloE* mExpr;
  • icGREP/icgrep-devel/icgrep/pe_or.cpp

    r3850 r4187  
    88
    99Or::Or(PabloE* expr1, PabloE* expr2)
     10: mExpr1(expr1)
     11, mExpr2(expr2)
    1012{
    11     mExpr1 = expr1;
    12     mExpr2 = expr2;
     13
    1314}
    1415
     
    1920}
    2021
    21 PabloE* Or::getExpr1()
     22PabloE* Or::getExpr1() const
    2223{
    2324    return mExpr1;
    2425}
    2526
    26 PabloE* Or::getExpr2()
     27PabloE* Or::getExpr2() const
    2728{
    2829    return mExpr2;
  • icGREP/icgrep-devel/icgrep/pe_or.h

    r3850 r4187  
    1515    Or(PabloE* expr1, PabloE* expr2);
    1616    ~Or();
    17     PabloE* getExpr1();
    18     PabloE* getExpr2();
     17    PabloE* getExpr1() const;
     18    PabloE* getExpr2() const;
    1919private:
    2020    PabloE* mExpr1;
  • icGREP/icgrep-devel/icgrep/pe_sel.cpp

    r3850 r4187  
    88
    99Sel::Sel(PabloE* if_expr, PabloE* t_expr, PabloE* f_expr)
     10: mIf_expr(if_expr)
     11, mT_expr(t_expr)
     12, mF_expr(f_expr)
    1013{
    11     mIf_expr = if_expr;
    12     mT_expr = t_expr;
    13     mF_expr = f_expr;
     14
    1415}
    1516
     
    2122}
    2223
    23 PabloE* Sel::getIf_expr()
     24PabloE* Sel::getIf_expr() const
    2425{
    2526    return mIf_expr;
    2627}
    2728
    28 PabloE* Sel::getT_expr()
     29PabloE* Sel::getT_expr() const
    2930{
    3031    return mT_expr;
    3132}
    3233
    33 PabloE* Sel::getF_expr()
     34PabloE* Sel::getF_expr() const
    3435{
    3536    return mF_expr;
  • icGREP/icgrep-devel/icgrep/pe_sel.h

    r3850 r4187  
    1515    Sel(PabloE* if_expr, PabloE* t_expr, PabloE* f_expr);
    1616    ~Sel();
    17     PabloE* getIf_expr();
    18     PabloE* getT_expr();
    19     PabloE* getF_expr();
     17    PabloE* getIf_expr() const;
     18    PabloE* getT_expr() const;
     19    PabloE* getF_expr() const;
    2020private:
    2121    PabloE* mIf_expr;
  • icGREP/icgrep-devel/icgrep/pe_var.cpp

    r3850 r4187  
    88
    99Var::Var(std::string var)
     10: mVar(var)
    1011{
    11     mVar = var;
     12
    1213}
    1314
    1415Var::~Var(){}
    1516
    16 std::string Var::getVar()
     17std::string Var::getVar() const
    1718{
    1819    return mVar;
  • icGREP/icgrep-devel/icgrep/pe_var.h

    r3850 r4187  
    1717    ~Var();
    1818    void setVar(std::string var);
    19     std::string getVar();
     19    std::string getVar() const;
    2020private:
    2121    std::string mVar;
  • icGREP/icgrep-devel/icgrep/pe_xor.cpp

    r3850 r4187  
    88
    99Xor::Xor(PabloE *expr1, PabloE *expr2)
     10: mExpr1(expr1)
     11, mExpr2(expr2)
    1012{
    11     mExpr1 = expr1;
    12     mExpr2 = expr2;
     13
    1314}
    1415
     
    1920}
    2021
    21 PabloE* Xor:: getExpr1()
     22PabloE* Xor:: getExpr1() const
    2223{
    2324    return mExpr1;
    2425}
    2526
    26 PabloE* Xor:: getExpr2()
     27PabloE* Xor:: getExpr2() const
    2728{
    2829    return mExpr2;
  • icGREP/icgrep-devel/icgrep/pe_xor.h

    r3850 r4187  
    1515    Xor(PabloE* expr1, PabloE* expr2);
    1616    ~Xor();
    17     PabloE* getExpr1();
    18     PabloE* getExpr2();
     17    PabloE* getExpr1() const;
     18    PabloE* getExpr2() const;
    1919private:
    2020    PabloE* mExpr1;
  • icGREP/icgrep-devel/icgrep/printer_pablos.h

    r3984 r4187  
    99
    1010//Regular Expressions
    11 #include "re_re.h"
    12 #include "re_cc.h"
    13 #include "re_start.h"
    14 #include "re_end.h"
    15 #include "re_seq.h"
     11#include "re/re_re.h"
     12#include "re/re_cc.h"
     13#include "re/re_start.h"
     14#include "re/re_end.h"
     15#include "re/re_seq.h"
    1616
    1717//Pablo Expressions
  • icGREP/icgrep-devel/icgrep/printer_re.cpp

    r4182 r4187  
    77#include "printer_re.h"
    88
     9//Regular Expressions
     10#include "re/re_re.h"
     11#include "re/re_alt.h"
     12#include "re/re_cc.h"
     13#include "re/re_name.h"
     14#include "re/re_end.h"
     15#include "re/re_rep.h"
     16#include "re/re_seq.h"
     17#include "re/re_start.h"
    918
    10 std::string Printer_RE::PrintRE(RE * re)
     19
     20const std::string Printer_RE::PrintRE(const RE * re)
    1121{
    1222    std::string retVal = "";
     
    1525        retVal = "--> RE NullPtr! <--";
    1626    }
    17     else if (Alt* re_alt = dynamic_cast<Alt*>(re))
     27    else if (const Alt* re_alt = dynamic_cast<const Alt*>(re))
    1828    {
    1929        retVal += "(Alt[";
    20         for (RE * re : *re_alt) {
    21             retVal += PrintRE(re) + ",";
     30        bool comma = false;
     31        for (const RE * re : *re_alt) {
     32            if (comma) {
     33                retVal += ',';
     34            }
     35            retVal += PrintRE(re);
     36            comma = true;
    2237        }
    23         retVal = retVal.substr(0, retVal.size() - 1);
    2438        retVal += "])";
    2539    }
    26     else if (CC* re_cc = dynamic_cast<CC*>(re))
     40    else if (const CC* re_cc = dynamic_cast<const CC*>(re))
    2741    {
    28         retVal += "CC \"";
     42        retVal = "CC \"";
    2943        retVal += re_cc->getName();
    3044        retVal += "\" ";
    3145
    32         for (const CharSetItem & item : re_cc->getItems())
     46        for (const CharSetItem & item : *re_cc)
    3347        {
    3448            retVal += "[";
     
    3852        }
    3953    }
    40     else if (Name* re_name = dynamic_cast<Name*>(re))
     54    else if (const Name* re_name = dynamic_cast<const Name*>(re))
    4155    {
    42         retVal += "Name \"";
     56        retVal = "Name \"";
    4357        retVal += re_name->getName();
    4458        retVal += "\" ";
    4559    }
    46     else if (dynamic_cast<End*>(re))
     60    else if (dynamic_cast<const End*>(re))
    4761    {
    48         retVal += "End";
     62        retVal = "End";
    4963    }
    50     else if (Rep* re_rep = dynamic_cast<Rep*>(re))
     64    else if (const Rep* re_rep = dynamic_cast<const Rep*>(re))
    5165    {
    52         retVal += "Rep("  + PrintRE(re_rep->getRE()) + "," + std::to_string(re_rep->getLB()) + ",";
    53         retVal += (re_rep->getUB() == UNBOUNDED_REP ? "Unbounded" : "UpperBound(" + std::to_string(re_rep->getUB()) + ")");
     66        retVal = "Rep(";
     67        retVal.append(PrintRE(re_rep->getRE()));
     68        retVal.append(",");
     69        retVal.append(std::to_string(re_rep->getLB()));
     70        retVal.append(",");
     71        if (re_rep->getUB() == Rep::UNBOUNDED_REP) {
     72            retVal.append("Unbounded");
     73        }
     74        else {
     75            retVal.append(std::to_string(re_rep->getUB()));           
     76        }
     77        retVal.append(")");
    5478    }
    55     else if (Seq* re_seq = dynamic_cast<Seq*>(re))
     79    else if (const Seq* re_seq = dynamic_cast<const Seq*>(re))
    5680    {
    57         retVal += "(Seq[";
    58         for (RE * re : *re_seq) {
    59             retVal += PrintRE(re) + ",";
     81        retVal = "(Seq[";
     82        bool comma = false;
     83        for (const RE * re : *re_seq) {
     84            if (comma) {
     85                retVal.append(",");
     86            }
     87            retVal.append(PrintRE(re));
     88            comma = true;
    6089        }
    61         retVal = retVal.substr(0, retVal.size() - 1);
    62         retVal += "])";
     90        retVal.append("])");
    6391    }
    64     else if (dynamic_cast<Start*>(re))
     92    else if (dynamic_cast<const Start*>(re))
    6593    {
    66         retVal += "Start";
     94        retVal = "Start";
    6795    }
    6896    else
    6997    {
    70         retVal += "--> RE Unknown <--";
     98        retVal = "--> RE Unknown <--";
    7199    }
    72 
    73     return retVal;
     100    return std::move(retVal);
    74101}
  • icGREP/icgrep-devel/icgrep/printer_re.h

    r3984 r4187  
    88#define PRINTER_RE_H
    99
    10 //Regular Expressions
    11 #include "re_re.h"
    12 #include "re_alt.h"
    13 #include "re_cc.h"
    14 #include "re_name.h"
    15 #include "re_end.h"
    16 #include "re_rep.h"
    17 #include "re_seq.h"
    18 #include "re_start.h"
     10#include <string>
    1911
    20 #include <iostream>
    21 #include <string>
    22 #include <sstream>
    23 #include <list>
    24 
     12class RE;
    2513
    2614class Printer_RE
    2715{
    2816public:
    29     static std::string PrintRE(RE* re);
     17    static const std::string PrintRE(const RE *re);
    3018};
    3119
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4182 r4187  
    66
    77#include "re_cc.h"
     8#include <assert.h>
     9#include <atomic>
    810
    9 int CC::msCSIidx = 0;
     11CC::CC() {
    1012
    11 CC::CC()
    12 {
    13     gensym_name();
    1413}
    1514
    16 CC::CC(int codepoint)
    17 {
    18     gensym_name();
    19     insert1(codepoint);
     15CC::CC(const CodePointType codepoint) {
     16    insert(codepoint);
    2017}
    2118
    22 CC::CC(int lo_codepoint, int hi_codepoint)
    23 {
    24     gensym_name();
     19CC::CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
    2520    insert_range(lo_codepoint, hi_codepoint);
    2621}
    2722
    28 CC::CC(CC *cc1, CC *cc2)
    29 {
    30     gensym_name();
    31     mSparseCharSet = cc2->getItems();
    32     joinCharSets(cc1->getItems());
     23CC::CC(const CC * cc1, const CC * cc2) {
     24    mSparseCharSet.assign(cc1->cbegin(), cc1->cend());
     25    join(cc2->mSparseCharSet);
    3326}
    3427
    35 CC::~CC(){}
     28CC::~CC() {
    3629
    37 std::vector<CharSetItem> CC::getItems()
    38 {
    39     return mSparseCharSet;
    4030}
    4131
    42 std::string CC::getName()
    43 {
     32std::string CC::getName() const {
    4433    std::string name = "CC";
    45 
    46     std::vector<CharSetItem>::iterator it;
    47     for (it = mSparseCharSet.begin(); it != mSparseCharSet.end(); ++it)
    48     {
    49         name += "_" + std::to_string(it->lo_codepoint);
    50         name += "." + std::to_string(it->hi_codepoint);
     34    for (const CharSetItem & i : mSparseCharSet) {
     35        name += "_" + std::to_string(i.lo_codepoint);
     36        name += "." + std::to_string(i.hi_codepoint);
    5137    }
    52 
    5338    return name;
    5439}
    5540
    56 std::string CC::getId()
    57 {
    58     return mId;
     41void CC::join(const CharSetVector & other) {
     42    for (const CharSetItem & i : other) {
     43        insert_range(i.lo_codepoint, i.hi_codepoint);
     44    }
    5945}
    6046
    61 bool CC::is_member(int codepoint)
    62 {
    63     return is_member_helper(codepoint, mSparseCharSet.size() - 1);
     47void CC::insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
     48    CharSetItem item(lo_codepoint, hi_codepoint);
     49    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
     50        CharSetItem & range = *i;
     51        if (item.hi_codepoint < range.lo_codepoint - 1) {
     52            mSparseCharSet.insert(i, item);
     53            return;
     54        }
     55        else if (item.lo_codepoint > range.hi_codepoint + 1) {
     56            ++i;
     57        }
     58        else {
     59            // ranges overlap; expand the range to include the prior one and
     60            // remove the old one from the list
     61            item.lo_codepoint = std::min(range.lo_codepoint, item.lo_codepoint);
     62            item.hi_codepoint = std::max(range.hi_codepoint, item.hi_codepoint);
     63            i = mSparseCharSet.erase(i);
     64        }
     65    }
     66    mSparseCharSet.push_back(item);
    6467}
    6568
    66 bool CC::is_member_helper(int codepoint, int idx)
    67 {
    68     if (idx == -1)
    69     {
    70         return false;
     69void CC::negate() {
     70    CharSetVector negated;
     71    negated.reserve(mSparseCharSet.size() + 1);
     72    CodePointType lo_codepoint = 0;
     73    for (const CharSetItem & item : mSparseCharSet) {
     74        negated.push_back(std::move(CharSetItem(lo_codepoint, item.lo_codepoint - 1)));
     75        lo_codepoint = item.hi_codepoint + 1;
    7176    }
    72     else
    73     {
    74         CharSetItem item = mSparseCharSet.at(idx);
     77    if (lo_codepoint <= UNICODE_MAX) {
     78        negated.push_back(std::move(CharSetItem(lo_codepoint, UNICODE_MAX)));
     79    }
     80    mSparseCharSet.assign(negated.begin(), negated.end());
     81}
    7582
    76         if (codepoint < item.lo_codepoint)
    77         {
    78             return false;
     83void CC::remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
     84    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
     85        CharSetItem & range = *i;
     86        if (lo_codepoint > range.hi_codepoint + 1) {
     87            ++i;
    7988        }
    80         else if (codepoint > item.hi_codepoint)
    81         {
    82             idx--;
    83             return is_member_helper(codepoint, idx);
     89        else if (hi_codepoint < range.lo_codepoint - 1) {
     90            break;
    8491        }
    85         else
    86         {
    87             return true;
     92        else if (lo_codepoint <= range.lo_codepoint && hi_codepoint >= range.hi_codepoint) {
     93            i = mSparseCharSet.erase(i);
     94        }
     95        else if (lo_codepoint <= range.lo_codepoint) {
     96            range.lo_codepoint = hi_codepoint + 1;
     97            break;
     98        }
     99        else if (hi_codepoint >= range.hi_codepoint) {
     100            range.hi_codepoint = lo_codepoint - 1;
     101            ++i;
     102        }
     103        else {
     104            CharSetItem item(hi_codepoint + 1, range.hi_codepoint);
     105            range.hi_codepoint = lo_codepoint - 1;
     106            mSparseCharSet.insert(++i, std::move(item));
     107            break;
    88108        }
    89109    }
    90110}
    91 
    92 void CC::joinCharSets(std::vector<CharSetItem> items1)
    93 {
    94     joinCharSets_helper(items1, items1.size() - 1);
    95 }
    96 
    97 void CC::joinCharSets_helper(std::vector<CharSetItem> items1, int idx)
    98 {
    99     if (idx > -1)
    100     {
    101         CharSetItem item = items1.at(idx);
    102         insert_range(item.lo_codepoint, item.hi_codepoint);
    103         idx--;
    104         joinCharSets_helper(items1, idx);
    105     }
    106 }
    107 
    108 void CC::insert1(int codepoint)
    109 {
    110     insert_range(codepoint, codepoint);
    111 }
    112 
    113 void CC::insert_range(int lo_codepoint, int hi_codepoint)
    114 {
    115     insert_range_helper(lo_codepoint, hi_codepoint, mSparseCharSet.size() - 1);
    116 }
    117 
    118 void CC::insert_range_helper(int lo_codepoint, int hi_codepoint, int idx)
    119 {
    120     if (idx == -1)
    121     {
    122         CharSetItem new_item;
    123         new_item.lo_codepoint = lo_codepoint;
    124         new_item.hi_codepoint = hi_codepoint;
    125         std::vector<CharSetItem>::iterator it;
    126         it = mSparseCharSet.begin();
    127         mSparseCharSet.insert(it, new_item);
    128     }
    129     else
    130     {
    131         CharSetItem item = mSparseCharSet.at(idx);
    132 
    133         if (hi_codepoint < item.lo_codepoint - 1)
    134         {
    135             CharSetItem new_item;
    136             new_item.lo_codepoint = lo_codepoint;
    137             new_item.hi_codepoint = hi_codepoint;
    138             std::vector<CharSetItem>::iterator it;
    139             it = mSparseCharSet.begin();
    140             mSparseCharSet.insert(it + idx + 1, new_item);
    141         }
    142         else if (lo_codepoint > item.hi_codepoint + 1)
    143         {
    144             idx--;
    145             insert_range_helper(lo_codepoint, hi_codepoint, idx);
    146         }
    147         else
    148         {
    149             int overlap_lo = item.lo_codepoint;
    150             int overlap_hi = item.hi_codepoint;
    151             std::vector<CharSetItem>::iterator it;
    152             it = mSparseCharSet.begin();
    153             mSparseCharSet.erase(it + idx);
    154             idx--;
    155             insert_range_helper(std::min(overlap_lo, lo_codepoint), std::max(overlap_hi, hi_codepoint), idx);
    156         }
    157     }
    158 }
    159 
    160 void CC::negate_class()
    161 {
    162     negate_class_helper(mSparseCharSet.size() - 1, 0);
    163 }
    164 
    165 void CC::negate_class_helper(int idx, int b)
    166 {
    167     if (idx == -1)
    168     {
    169         if (b <= mUnicodeMax)
    170         {
    171             CharSetItem new_item;
    172 
    173             new_item.lo_codepoint = b;
    174             new_item.hi_codepoint = mUnicodeMax;
    175             std::vector<CharSetItem>::iterator it;
    176             it = mSparseCharSet.begin();
    177             mSparseCharSet.insert(it, new_item);
    178         }
    179     }
    180     else
    181     {
    182         CharSetItem item = mSparseCharSet.at(idx);
    183 
    184         if (b < item.lo_codepoint)
    185         {
    186             CharSetItem new_item;
    187 
    188             new_item.lo_codepoint = b;
    189             new_item.hi_codepoint = item.lo_codepoint - 1;
    190             std::vector<CharSetItem>::iterator it;
    191             it = mSparseCharSet.begin();
    192             mSparseCharSet.erase(it + idx);
    193             mSparseCharSet.insert(it + idx, new_item);
    194             idx--;
    195             negate_class_helper(idx, item.hi_codepoint + 1);
    196         }
    197         else
    198         {
    199             std::vector<CharSetItem>::iterator it;
    200             it = mSparseCharSet.begin();
    201             mSparseCharSet.erase(it + idx);
    202             idx--;
    203             negate_class_helper(idx, item.hi_codepoint + 1);
    204         }
    205     }
    206 }
    207 
    208 void CC::remove1(int codepoint)
    209 {
    210     remove_range(codepoint, codepoint);
    211 }
    212 
    213 void CC::remove_range(int lo_codepoint, int hi_codepoint)
    214 {
    215     remove_range_helper(lo_codepoint, hi_codepoint, mSparseCharSet.size() - 1);
    216 }
    217 
    218 void CC::remove_range_helper(int lo_codepoint, int hi_codepoint, int idx)
    219 {
    220     if (idx != -1)
    221     {
    222         CharSetItem item = mSparseCharSet.at(idx);
    223 
    224         if (hi_codepoint < item.lo_codepoint - 1)
    225         {
    226             return;
    227         }
    228         else if (lo_codepoint > item.hi_codepoint + 1)
    229         {
    230             idx--;
    231             remove_range_helper(lo_codepoint, hi_codepoint, idx);
    232         }
    233         else if ((lo_codepoint <= item.lo_codepoint) && (hi_codepoint >= item.hi_codepoint))
    234         {
    235             std::vector<CharSetItem>::iterator it;
    236             it = mSparseCharSet.begin();
    237             mSparseCharSet.erase(it + idx);
    238             idx--;
    239             remove_range_helper(lo_codepoint, hi_codepoint, idx);
    240         }
    241         else if (lo_codepoint <= item.lo_codepoint)
    242         {
    243             CharSetItem new_item;
    244             new_item.lo_codepoint = hi_codepoint + 1;
    245             new_item.hi_codepoint = item.hi_codepoint;
    246             std::vector<CharSetItem>::iterator it;
    247             it = mSparseCharSet.begin();
    248             mSparseCharSet.erase(it + idx);
    249             mSparseCharSet.insert(it + idx, new_item);
    250         }
    251         else if (hi_codepoint >= item.hi_codepoint)
    252         {
    253             CharSetItem new_item;
    254             new_item.lo_codepoint = item.lo_codepoint;
    255             new_item.hi_codepoint = lo_codepoint - 1;
    256             std::vector<CharSetItem>::iterator it;
    257             it = mSparseCharSet.begin();
    258             mSparseCharSet.erase(it + idx);
    259             mSparseCharSet.insert(it + idx, new_item);
    260             idx--;
    261             remove_range_helper(lo_codepoint, hi_codepoint, idx);
    262         }
    263         else
    264         {
    265             CharSetItem new_item1;
    266             new_item1.lo_codepoint = hi_codepoint + 1;
    267             new_item1.hi_codepoint = item.hi_codepoint;
    268             CharSetItem new_item2;
    269             new_item2.lo_codepoint = item.lo_codepoint;
    270             new_item2.hi_codepoint = lo_codepoint - 1;
    271             std::vector<CharSetItem>::iterator it;
    272             it = mSparseCharSet.begin();
    273             mSparseCharSet.erase(it + idx);
    274             mSparseCharSet.insert(it + idx, new_item1);
    275             mSparseCharSet.insert(it + idx, new_item2);
    276         }
    277     }
    278 }
    279 
    280 void CC::gensym_name()
    281 {
    282     mId = "lex.CC" + std::to_string(msCSIidx);
    283     msCSIidx++;
    284 }
    285 
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4182 r4187  
    1616#include <vector>
    1717
     18typedef int CodePointType;
    1819
    19 struct CharSetItem{
    20     int lo_codepoint;
    21     int hi_codepoint;
     20struct CharSetItem{   
     21    CharSetItem() : lo_codepoint(0), hi_codepoint(0) {}
     22    CharSetItem(const CodePointType lo, const CodePointType hi) : lo_codepoint(lo), hi_codepoint(hi) {}
     23    CodePointType lo_codepoint;
     24    CodePointType hi_codepoint;
    2225};
    2326
    24 class CC : public RE
    25 {
     27typedef std::vector<CharSetItem> CharSetVector;
     28
     29class CC : public RE {
    2630public:
     31
     32    typedef CharSetVector::iterator                 iterator;
     33    typedef CharSetVector::const_iterator           const_iterator;
     34    typedef CharSetVector::size_type                size_type;
     35    typedef CharSetVector::reference                reference;
     36    typedef CharSetVector::const_reference          const_reference;
     37
     38    static const CodePointType UNICODE_MAX = 0x10FFFF;
    2739    CC();
    28     CC(int codepoint);
    29     CC(int lo_codepoint, int hi_codepoint);
    30     CC(CC* cc1, CC* cc2);
     40    CC(const CodePointType codepoint);
     41    CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
     42    CC(const CC * cc1, const CC * cc2);
    3143    ~CC();
    32     std::vector<CharSetItem> getItems();
    33     std::string getName();
    34     std::string getId();
    35     bool is_member(int codepoint);
    36     void insert1(int codepoint);
    37     void insert_range(int lo_codepoint,int hi_codepoint);
    38     void negate_class();
    39     void remove1(int codepoint);
    40     void remove_range(int lo_codepoint,int hi_codepoint);
     44    std::string getName() const;
     45    void insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
     46    void negate();
     47    void remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    4148
    42 protected:
    43     static int msCSIidx;
    44 private:
    45     static const int mUnicodeMax = 0x10FFFF;
     49    inline void insert(const CodePointType codepoint) {
     50        insert_range(codepoint, codepoint);
     51    }
    4652
    47     void gensym_name();
    48     bool is_member_helper(int codepoint, int idx);
    49     void joinCharSets(std::vector<CharSetItem> items1);
    50     void joinCharSets_helper(std::vector<CharSetItem> items1, int idx);
    51     void insert_range_helper(int lo_codepoint, int hi_codepoint, int idx);
    52     void negate_class_helper(int idx, int b);
    53     void remove_range_helper(int lo_codepoint, int hi_codepoint, int idx);
     53    inline void remove(const CodePointType codepoint) {
     54        remove_range(codepoint, codepoint);
     55    }
    5456
    55     std::vector<CharSetItem> mSparseCharSet;
    56     std::string mId;
     57    inline iterator begin() {
     58        return mSparseCharSet.begin();
     59    }
     60
     61    inline iterator end() {
     62        return mSparseCharSet.end();
     63    }
     64
     65    inline reference front() {
     66        return mSparseCharSet.front();
     67    }
     68
     69    inline reference back() {
     70        return mSparseCharSet.back();
     71    }
     72
     73    inline const_iterator cbegin() const {
     74        return mSparseCharSet.cbegin();
     75    }
     76
     77    inline const_iterator cend() const {
     78        return mSparseCharSet.cend();
     79    }
     80
     81    inline const_reference front() const {
     82        return mSparseCharSet.front();
     83    }
     84
     85    inline const_reference back() const {
     86        return mSparseCharSet.back();
     87    }
     88
     89    inline size_type size() const {
     90        return mSparseCharSet.size();
     91    }
     92
     93    inline bool empty() const {
     94        return mSparseCharSet.empty();
     95    }
     96
     97private:   
     98    void join(const CharSetVector & other);
     99    CharSetVector mSparseCharSet;
    57100};
    58101
     102inline static CC::iterator begin(CC & cc) {
     103    return cc.begin();
     104}
     105
     106inline static CC::iterator end(CC & cc) {
     107    return cc.end();
     108}
     109
     110inline static CC::const_iterator begin(const CC & cc) {
     111    return cc.cbegin();
     112}
     113
     114inline static CC::const_iterator end(const CC & cc) {
     115    return cc.cend();
     116}
     117
     118
    59119#endif // RE_CC_H
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4182 r4187  
    1616#include "re_simplifier.h"
    1717#include "re_reducer.h"
    18 
    19 #include "printer_pablos.h"
    20 #include "printer_re.h"
    21 
    22 #include "utf8_encoder.h"
    23 
    2418#include "parsefailure.h"
    2519#include "re_parser.h"
    26 #include "cc_compiler.h"
    2720
    28 #include "pbix_compiler.h"
    29 #include "symbol_generator.h"
     21#include "../hrtime.h"
     22
     23#include "../printer_pablos.h"
     24#include "../printer_re.h"
     25
     26#include "../utf8_encoder.h"
     27#include "../cc_compiler.h"
     28#include "../pbix_compiler.h"
     29#include "../symbol_generator.h"
    3030
    3131//FOR TESTING AND AND ANALYSIS
     
    5959    }
    6060
    61 
    62 
    63 
    64 
    6561    //Print to the terminal the AST that was generated by the parser before adding the UTF encoding:
    66     //std::cout << "\nParser:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     62    // std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    6763
    6864    //Add the UTF encoding.
     
    7571        else
    7672        {
    77             std::cout << "Invalid encoding!" << std::endl;
     73            std::cerr << "Invalid encoding!" << std::endl;
    7874            exit(1);
    7975        }
     
    8177
    8278    //Print to the terminal the AST that was generated by the utf8 encoder.
    83     //std::cout << "\nUTF8-encoder:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     79    // std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    8480
    8581    //Optimization passes to simplify the AST.
     
    9187
    9288    //Print to the terminal the AST that was generated by the simplifier.
    93     //std::cout << "\nSimplifier:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     89    // std::cout << "\nSimplifier:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
    9490
    9591    //Map all of the unique character classes in order to reduce redundancy.
     
    9894
    9995    //Print to the terminal the AST with the reduced REs.
    100     //std::cout << "\nReducer:\n" + Printer_RE::PrintRE(re_ast) + "\n" << std::endl;
     96    // std::cerr << "Reducer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    10197
    10298    //Build our list of predefined characters.
     
    132128    std::list<PabloS*> cc_stmtsl = cc_compiler.get_compiled();
    133129    //Print to the terminal the AST that was generated by the character class compiler.
    134     //std::cout << "\n" << "(" << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << ")" << "\n" << std::endl;
     130    // std::cerr << "CC AST:" << std::endl << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << std::endl;
    135131
    136132    Pbix_Compiler pbix_compiler(name_map);
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4182 r4187  
    88#define RE_COMPILER_H
    99
    10 #include "hrtime.h"
    11 
    12 //Regular Expressions
    1310#include "re_re.h"
    14 #include "utf_encoding.h"
    15 #include "llvm_gen.h"
     11#include "../utf_encoding.h"
     12#include "../llvm_gen.h"
    1613#include <string>
    1714
  • icGREP/icgrep-devel/icgrep/re/re_name.cpp

    r4182 r4187  
    22
    33Name::Name()
    4 {
     4: mName()
     5, mNegated(false)
     6, mType(Name::FixedLength) {
    57    mName = "";
    68    mNegated = false;
     
    911
    1012Name::Name(std::string name)
    11 {
    12     mName = name;
    13     mNegated = false;
    14     mType = Name::FixedLength;
     13: mName(name)
     14, mNegated(false)
     15, mType(Name::FixedLength) {
     16
    1517}
    1618
    17 Name::~Name(){}
     19Name::Name(const Name * name)
     20: mName(name->getName())
     21, mNegated(name->isNegated())
     22, mType(name->getType()) {
    1823
    19 void Name::setName(std::string name)
    20 {
     24}
     25
     26Name::~Name(){
     27
     28}
     29
     30void Name::setName(std::string name) {
    2131    mName = name;
    2232}
    2333
    24 std::string Name::getName() const
    25 {
     34std::string Name::getName() const {
    2635    return mName;
    2736}
    2837
    29 bool Name::isNegated() const
    30 {
     38bool Name::isNegated() const {
    3139    return mNegated;
    3240}
    3341
    34 void Name::setNegated(bool is_negated)
    35 {
     42void Name::setNegated(const bool is_negated) {
    3643    mNegated = is_negated;
    3744}
    3845
    39 void Name::setType(Name::Type type)
    40 {
     46void Name::setType(const Type type) {
    4147    mType = type;
    4248}
    4349
    44 Name::Type Name::getType() const
    45 {
     50Name::Type Name::getType() const {
    4651    return mType;
    4752}
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4182 r4187  
    1111    typedef enum {FixedLength,Unicode,UnicodeCategory} Type;
    1212    Name();
     13    Name(const Name * name);
    1314    Name(std::string name);
    1415    void setName(std::string name);
    1516    std::string getName() const;
    16     void setNegated(bool is_negated);
     17    void setNegated(const bool is_negated);
    1718    bool isNegated() const;
    18     void setType(Type type);
     19    void setType(const Type type);
    1920    Type getType() const;
    2021    ~Name();
  • icGREP/icgrep-devel/icgrep/re/re_nullable.cpp

    r4182 r4187  
    1515*/
    1616
    17 RE * RE_Nullable::removeNullablePrefix(RE* re) {
     17RE * RE_Nullable::removeNullablePrefix(RE * re) {
    1818    if (Seq * re_seq = dynamic_cast<Seq*>(re)) {
    1919        re = removeNullableSeqPrefix(re_seq);
     
    4242    return re;
    4343}
    44 
    4544
    4645inline Seq * RE_Nullable::removeNullableSeqPrefix(const Seq * seq) {
     
    7978        else if (hasNullableSuffix(re_rep->getRE())) {
    8079            Vector seq;
    81             seq.push_back(new Rep(re_rep->getRE(), re_rep->getLB() - 1, re_rep->getLB() - 1));
     80            seq.push_back(RE_Simplifier::makeRep(re_rep->getRE(), re_rep->getLB() - 1, re_rep->getLB() - 1));
    8281            seq.push_back(removeNullableSuffix(re_rep->getRE()));
    8382            re = RE_Simplifier::makeSeq(Seq::Normal, seq);
     
    152151    bool nullable = false;
    153152    if (const Seq * seq = dynamic_cast<const Seq*>(re)) {
    154         nullable = isNullable(seq->back()) ? true : hasNullablePrefix(seq->back());
     153        nullable = isNullable(seq->back()) ? true : hasNullableSuffix(seq->back());
    155154    }
    156155    else if (const Alt * alt = dynamic_cast<const Alt*>(re)) {
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4182 r4187  
    3131}
    3232
     33template<class T>
     34inline static RE * simplify_vector(T & vec) {
     35    RE * re;
     36    if (vec->size() == 1) {
     37        re = vec->back();
     38        vec->pop_back();
     39    }
     40    else {
     41        re = vec.release();
     42    }
     43    return re;
     44}
     45
    3346RE * RE_Parser::parse_alt(const bool subexpression) {
    3447    std::unique_ptr<Alt> alt(new Alt());
     
    5366        throw ParseFailure("Cannot fully parse statement!");
    5467    }
    55 
    56     RE * re;
    57     if (alt->size() == 1) {
    58         re = alt->back();
    59         alt->pop_back();
    60     }
    61     else {
    62         re = alt.release();
    63     }
    64     return re;
     68    return simplify_vector(alt);
    6569}
    6670
     
    7882        throw NoRegularExpressionFound();
    7983    }
    80 
    81     RE * re;
    82     if (seq->size() == 1) {
    83         re = seq->back();
    84         seq->pop_back();
    85     }
    86     else {
    87         re = seq.release();
    88     }
    89     return re;
     84    return simplify_vector(seq);
    9085}
    9186
     
    139134        case '*':
    140135            ++_cursor; // skip past the '*'
    141             re = new Rep(re, 0, UNBOUNDED_REP);
     136            re = new Rep(re, 0, Rep::UNBOUNDED_REP);
    142137            break;
    143138        case '?':
     
    147142        case '+':
    148143            ++_cursor; // skip past the '+'
    149             re = new Rep(re, 1, UNBOUNDED_REP);
     144            re = new Rep(re, 1, Rep::UNBOUNDED_REP);
    150145            break;
    151146        case '{':
     
    182177        throw_incomplete_expression_error_if_end_of_stream();
    183178        if (*_cursor == '}') {
    184             rep = new Rep(re, lower_bound, UNBOUNDED_REP);
     179            rep = new Rep(re, lower_bound, Rep::UNBOUNDED_REP);
    185180        }
    186181        else {
     
    273268        }
    274269        name->setName(std::string(start, _cursor));
    275         if (isValidUnicodeCategoryName(name)) {
    276             ++_cursor;
    277             return name.release();
    278         }
     270        ++_cursor;
     271        return name.release();
    279272    }
    280273    throw ParseFailure("Incorrect Unicode character class format!");
     
    301294                // close the bracket expression.
    302295                if (start == _cursor) {
    303                     cc->insert1(']');
     296                    cc->insert(']');
    304297                    ++_cursor;
    305298                    included_closing_square_bracket = true;
     
    323316                    if ((start == _cursor) ? (*next != '-') : (*next == ']')) {
    324317                        _cursor = next;
    325                         cc->insert1('-');
     318                        cc->insert('-');
    326319                        break;
    327320                    }
     
    351344                }
    352345            }
    353             cc->insert1(low);
     346            cc->insert(low);
    354347        }
    355348    }
     
    369362    if (*_cursor == '\\') {
    370363        if (++_cursor == _end) {
    371             return false;
     364            throw ParseFailure("Unknown charset escape!");
    372365        }
    373366        switch (*_cursor) {
     
    431424
    432425inline void RE_Parser::negate_cc(std::unique_ptr<CC> & cc) {
    433     cc->negate_class();
    434     cc->remove1(10);
    435 }
    436 
    437 bool RE_Parser::isValidUnicodeCategoryName(const std::unique_ptr<Name> & name) {
    438     static const char * SET_OF_VALID_CATEGORIES[] = {
    439         "C", "Cc", "Cf", "Cn", "Co", "Cs",
    440         "L", "L&", "Lc", "Ll", "Lm", "Lo", "Lt", "Lu",
    441         "M", "Mc", "Me", "Mn",
    442         "N", "Nd", "Nl", "No",
    443         "P", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps",
    444         "S", "Sc", "Sk", "Sm", "So",
    445         "Z", "Zl", "Zp", "Zs"
    446     };
    447     // NOTE: this method isn't as friendly as using an unordered_set for VALID_CATEGORIES since it requires
    448     // that the set is in ALPHABETICAL ORDER; however it ought to have less memory overhead than an
    449     // unordered_set and roughly equivalent speed.
    450     return std::binary_search(std::begin(SET_OF_VALID_CATEGORIES), std::end(SET_OF_VALID_CATEGORIES), name->getName());
     426    cc->negate();
     427    cc->remove(10);
    451428}
    452429
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4182 r4187  
    5252    bool parse_charset_literal(unsigned & literal);
    5353
    54     static bool isValidUnicodeCategoryName(const std::unique_ptr<Name> &name);
    55 
    5654    unsigned parse_hex();
    5755
  • icGREP/icgrep-devel/icgrep/re/re_reducer.cpp

    r4182 r4187  
    11#include "re_reducer.h"
    2 
     2#include <assert.h>
    33
    44RE* RE_Reducer::reduce(RE* re, std::map<std::string, RE*>& re_map) {
    55    RE* retVal = nullptr;
     6    assert (re);
    67    if (Alt* re_alt = dynamic_cast<Alt*>(re)) {
    78        Alt * new_alt = new Alt();
     
    4041    }
    4142    else if (Name* re_name = dynamic_cast<Name*>(re)) {
    42         Name* name = new Name(re_name->getName());
    43         name->setType(re_name->getType());
    44         name->setNegated(re_name->isNegated());   // TODO:  Hide this in the re_name module.
    45         retVal = name;
     43        retVal = new Name(re_name);
    4644    }
    4745    else if (dynamic_cast<Start*>(re)) {
  • icGREP/icgrep-devel/icgrep/re/re_rep.cpp

    r4182 r4187  
    66
    77#include "re_rep.h"
    8 
    9 Rep::Rep(RE* re, int lb, int ub)
    10 {
    11     mRE = re;
    12     mLB = lb;
    13     mUB = ub;
    14 }
    15 
    16 Rep::~Rep()
    17 {
    18     delete mRE;
    19 }
    20 
    21 RE* Rep::getRE() const
    22 {
    23     return mRE;
    24 }
    25 
    26 int Rep::getLB() const
    27 {
    28     return mLB;
    29 }
    30 
    31 void Rep::setLB(int lb)
    32 {
    33     mLB = lb;
    34 }
    35 
    36 int Rep::getUB() const
    37 {
    38     return mUB;
    39 }
    40 
    41 void Rep::setUB(int ub)
    42 {
    43     mUB = ub;
    44 }
    45 
    46 
  • icGREP/icgrep-devel/icgrep/re/re_rep.h

    r4182 r4187  
    1010#include "re_re.h"
    1111
    12 const int UNBOUNDED_REP = -1;
     12class Rep : public RE {
     13public:
    1314
    14 class Rep : public RE
    15 {
    16 public:
     15    enum { UNBOUNDED_REP = -1 };
     16
    1717    Rep(RE* re, int lb, int ub);
    1818    ~Rep();
    19     RE* getRE() const;
     19    RE * getRE() const;
     20    void setRE(RE * re = nullptr);
    2021    int getLB() const;
    2122    void setLB(int lb);
     
    2829};
    2930
     31inline Rep::Rep(RE * re, int lb, int ub)
     32: mRE(re)
     33, mLB(lb)
     34, mUB(ub)
     35{
     36
     37}
     38
     39inline Rep::~Rep() {
     40    delete mRE;
     41}
     42
     43inline RE * Rep::getRE() const {
     44    return mRE;
     45}
     46
     47inline void Rep::setRE(RE * re) {
     48    mRE = re;
     49}
     50
     51inline int Rep::getLB() const {
     52    return mLB;
     53}
     54
     55inline void Rep::setLB(int lb) {
     56    mLB = lb;
     57}
     58
     59inline int Rep::getUB() const {
     60    return mUB;
     61}
     62
     63inline void Rep::setUB(int ub) {
     64    mUB = ub;
     65}
     66
    3067#endif
  • icGREP/icgrep-devel/icgrep/re/re_simplifier.cpp

    r4182 r4187  
    1515    if (Alt * re_alt = dynamic_cast<Alt*>(re)) {
    1616        Vector simplified_alt;
    17         for (RE * re : *re_alt)
    18         {
     17        for (RE * re : *re_alt) {
    1918            simplified_alt.push_back(simplify(re));
    2019        }
     
    3332    }
    3433    else if (Name* re_name = dynamic_cast<Name*>(re)) {
    35         Name* name = new Name(re_name->getName());
    36         name->setType(re_name->getType());
    37         name->setNegated(re_name->isNegated());   // TODO:  Hide this in the re_name module.
    38         retVal = name;
     34        retVal = new Name(re_name);
    3935    }
    4036    else if (Rep* re_rep = dynamic_cast<Rep*>(re)) {
     
    8783}
    8884
     85/**
     86 * @brief makeAlt
     87 *
     88 * Build an Alt, flattening alternative subgroups, and combining character classes and
     89 * move character classes towards the end of the list to ensure that all combinations are found.
     90 *
     91 * @param list
     92 * @return simplified RE representing the Alt
     93 */
    8994RE * RE_Simplifier::makeAlt(Vector & list) {
    90 
    91     /*
    92       Build a list for Alt, flattening alternative subgroups, and combining character classes.  We
    93       move character classes towards the end of the list to ensure that all combinations are found.
    94     */
    95 
    9695    RE * re = nullptr;
    9796    if (!list.empty()) {
     
    124123
    125124        if (new_alt->size() == 1) {
     125            // if only one alternation exists, discard the Alt object itself and return the internal RE.
    126126            re = new_alt->back();
    127127            new_alt->pop_back();
    128128        }
    129129        else {
    130             re = new_alt.release();
     130            re = cse(new_alt.release());
    131131        }
    132132    }
    133 
    134133    return re;
    135134}
    136135
    137 RE * RE_Simplifier::makeRep(RE * re, const int lb2, const int ub2)
     136inline RE * RE_Simplifier::cse(Alt * alt) {
     137
     138
     139
     140
     141    return alt;
     142}
     143
     144
     145RE * RE_Simplifier::makeRep(RE * re, const int lb, const int ub)
    138146{
    139147    if (Rep* rep = dynamic_cast<Rep*>(re)) {
    140         if (((rep->getUB() == UNBOUNDED_REP) && (lb2 > 0)) ||
    141                 ((rep->getUB() == UNBOUNDED_REP) && (rep->getLB() <= 1))) {
    142             return new Rep(rep->getRE(), rep->getLB() * lb2, UNBOUNDED_REP);
     148        if (((rep->getUB() == Rep::UNBOUNDED_REP) && (lb > 0)) ||
     149                ((rep->getUB() == Rep::UNBOUNDED_REP) && (rep->getLB() <= 1))) {
     150            return new Rep(rep->getRE(), rep->getLB() * lb, Rep::UNBOUNDED_REP);
    143151        }
    144         else if ((rep->getUB() == UNBOUNDED_REP) && (lb2 == 0)) {
     152        else if ((rep->getUB() == Rep::UNBOUNDED_REP) && (lb == 0)) {
    145153            return new Rep(rep, 0, 1);
    146154        }
    147         else if ((rep->getUB() * lb2) >= (rep->getLB() * (lb2 + 1) - 1)) {
    148             return new Rep(rep->getRE(), rep->getLB() * lb2, ubCombine(rep->getUB(), ub2));
     155        else if ((rep->getUB() * lb) >= (rep->getLB() * (lb + 1) - 1)) {
     156            return new Rep(rep->getRE(), rep->getLB() * lb, ubCombine(rep->getUB(), ub));
    149157        }
    150158        else {
    151             return new Rep(rep, lb2, ub2);
     159            return new Rep(rep, lb, ub);
    152160        }
    153161    }
    154162    else {
    155         if (Seq* seq = dynamic_cast<Seq*>(re)) {
     163        if (Seq * seq = dynamic_cast<Seq*>(re)) {
    156164            if (seq->empty()) {
    157165                return seq;
     
    159167        }
    160168
    161         if ((lb2 == 0) && (ub2 == 0)) {
     169        if ((lb == 0) && (ub == 0)) {
    162170            return new Seq();
    163171        }
    164         else if ((lb2 == 1) && (ub2 == 1)) {
     172        else if ((lb == 1) && (ub == 1)) {
    165173            return re;
    166174        }
    167175        else {
    168             return new Rep(re, lb2, ub2);
     176            return new Rep(re, lb, ub);
    169177        }
    170178    }
    171179}
    172180
    173 inline int RE_Simplifier::ubCombine(const int h1, const int h2)
    174 {
    175     if ((h1 == UNBOUNDED_REP) || (h2 == UNBOUNDED_REP))
    176     {
    177         return UNBOUNDED_REP;
     181inline int RE_Simplifier::ubCombine(const int h1, const int h2) {
     182    if ((h1 == Rep::UNBOUNDED_REP) || (h2 == Rep::UNBOUNDED_REP)) {
     183        return Rep::UNBOUNDED_REP;
    178184    }
    179     else
    180     {
     185    else {
    181186        return h1 * h2;
    182187    }
  • icGREP/icgrep-devel/icgrep/re/re_simplifier.h

    r4182 r4187  
    33
    44//Regular Expressions
    5 #include "re_re.h"
    65#include "re_seq.h"
    76#include <list>
     7
     8class Alt;
    89
    910class RE_Simplifier {
     
    1112public:
    1213    static RE * makeAlt(Vector & list);
     14    static RE * cse(Alt * alt);
    1315    static RE * makeSeq(const Seq::Type type, Vector & list);
    14     static RE * makeRep(RE * re, const int lb2, const int ub2);
     16    static RE * makeRep(RE * re, const int lb, const int ub);
    1517    static RE * simplify(RE* re);
    1618private:
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4182 r4187  
    77#include "utf8_encoder.h"
    88
     9#include "re/re_name.h"
     10#include "re/re_start.h"
     11#include "re/re_end.h"
     12#include "re/re_seq.h"
     13#include "re/re_alt.h"
     14#include "re/re_rep.h"
     15#include "re/re_simplifier.h"
     16
     17#include <assert.h>
     18#include <stdexcept>
    919
    1020RE* UTF8_Encoder::toUTF8(RE* re) {
    1121
    1222    RE* retVal = nullptr;
    13 
    1423    if (Alt* re_alt = dynamic_cast<Alt*>(re)) {
    15 
    1624        Alt * new_alt = new Alt();
    1725        for (RE * re : *re_alt) {
     
    2129    }
    2230    else if (Seq * re_seq = dynamic_cast<Seq*>(re)) {
    23 
    24         Seq * new_seq = new Seq(re_seq->getType());
    2531        //If this is a previously encoded Unicode byte sequence.
    2632        if (re_seq->getType() == Seq::Byte) {
    27             // Should we be throwing an error here? no byte sequences should exist in the code.
    28             // The parser should now convert them to UNICODE code points.
    29             for (RE * re : *re_seq) {
    30                 if (CC * cc = dynamic_cast<CC*>(re)) {
    31                     const CharSetItem & item = cc->getItems().front();
    32                     new_seq->push_back(new CC(item.lo_codepoint));
    33                 }
    34             }
    35         }
    36         else {
    37             for (RE * re : *re_seq) {
    38                 new_seq->push_back(toUTF8(re));
    39             }
     33            throw std::runtime_error("Unexpected UTF Byte Sequence given to UTF8 Encoder.");
     34        }
     35        Seq * new_seq = new Seq(Seq::Normal);
     36        for (RE * re : *re_seq) {
     37            new_seq->push_back(toUTF8(re));
    4038        }
    4139        retVal = new_seq;
     
    4745    else if (CC* re_cc = dynamic_cast<CC*>(re))
    4846    { 
    49         if (re_cc->getItems().size() == 1)
    50         {
    51             retVal = rangeToUTF8(re_cc->getItems().front());
    52         }
    53         else if (re_cc->getItems().size() > 1) {
     47        if (re_cc->size() == 1)
     48        {
     49            retVal = rangeToUTF8(re_cc->front());
     50        }
     51        else if (re_cc->size() > 1) {
    5452            RE::Vector re_list;
    55             for (auto & item : re_cc->getItems()) {
     53            for (const CharSetItem & item : *re_cc) {
    5654                re_list.push_back(rangeToUTF8(item));
    5755            }
     
    5957        }
    6058    }
    61     else if (Name* re_name = dynamic_cast<Name*>(re))
    62     {
    63         Name* name = new Name(re_name->getName());
    64         name->setType(re_name->getType());
    65         name->setNegated(re_name->isNegated());   // TODO:  Hide this in the re_name module.
    66         retVal = name;
    67     }
    68     else if (dynamic_cast<Start*>(re))
    69     {
     59    else if (Name* re_name = dynamic_cast<Name*>(re)) {
     60        retVal = new Name(re_name);
     61    }
     62    else if (dynamic_cast<Start*>(re)) {
    7063        retVal = new Start();
    7164    }
    72     else if (dynamic_cast<End*>(re))
    73     {
     65    else if (dynamic_cast<End*>(re)) {
    7466        retVal = new End();
    7567    }
     
    8173    int u8len_lo = u8len(item.lo_codepoint);
    8274    int u8len_hi = u8len(item.hi_codepoint);
    83 
    8475    if (u8len_lo < u8len_hi)
    8576    {
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r4182 r4187  
    99
    1010//Regular Expressions
    11 #include "re_re.h"
    12 #include "re_cc.h"
    13 #include "re_name.h"
    14 #include "re_start.h"
    15 #include "re_end.h"
    16 #include "re_seq.h"
    17 #include "re_alt.h"
    18 #include "re_rep.h"
     11#include "re/re_re.h"
     12#include "re/re_cc.h"
    1913
    20 #include "re_simplifier.h"
    2114
    2215class UTF8_Encoder
Note: See TracChangeset for help on using the changeset viewer.