Changeset 4242


Ignore:
Timestamp:
Oct 15, 2014, 12:15:44 PM (5 years ago)
Author:
nmedfort
Message:

Minor changes in preperation for adding multiplexing.

Location:
icGREP/icgrep-devel/icgrep
Files:
3 added
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4234 r4242  
    3131namespace cc {
    3232
    33 CC_Compiler::CC_Compiler(PabloBlock & cg, const Encoding encoding, const std::string basis_pattern, const std::string gensym_pattern)
     33CC_Compiler::CC_Compiler(PabloBlock & cg, const Encoding encoding, const std::string basis_pattern)
    3434: mCG(cg)
    3535, mBasisBit(encoding.getBits())
    3636, mEncoding(encoding)
    37 , mGenSymPattern(gensym_pattern)
    38 , mBasisPattern(basis_pattern)
    3937{
    4038    for (int i = 0; i < mEncoding.getBits(); i++) {
    41         mBasisBit[i] = mCG.createVar(mBasisPattern + std::to_string((mEncoding.getBits() - 1) - i));
    42     }
    43 }
    44 
    45 inline Var * CC_Compiler::getBasisVar(const int i) const {
    46     return mBasisBit[i];
    47 }
    48 
    49 void CC_Compiler::compile(const REMap & re_map) {
     39        mBasisBit[i] = mCG.createVar(basis_pattern + std::to_string(i));
     40    }
     41}
     42
     43void CC_Compiler::compile(const RENameMap & re_map) {
    5044    for (auto i =  re_map.cbegin(); i != re_map.cend(); ++i) {
    51         process_re(i->second);
    52     }
    53     for (auto i =  re_map.cbegin(); i != re_map.cend(); ++i) {
    54         //This is specifically for the utf8 multibyte character classes.
    55         if (Seq * seq = dyn_cast<Seq>(i->second)) {
    56             if (seq->getType() == Seq::Type::Byte) {
    57                 Assign * assignment = nullptr;
    58                 auto j = seq->begin();
    59                 while (true) {
    60                     Name * name = dyn_cast<Name>(*j);
    61                     assert (name);
    62                     CharClass * cc = mCG.createCharClass(name->getName());
    63                     PabloE * sym = assignment ? mCG.createAnd(mCG.createVar(assignment->getName()), cc) : cc;
    64                     if (++j != seq->end()) {
    65                         assignment = mCG.createAssign(mCG.ssa("marker"), mCG.createAdvance(sym));
    66                         continue;
    67                     }
    68                     mCG.createAssign(seq->getName(), sym);
    69                     break;
     45        if (const CC * cc = dyn_cast<CC>(i->second)) {
     46            // If we haven't already computed this CC, map it to the (pablo) charset statements.
     47            if (mComputedSet.insert(cc->getName()).second) {
     48                mCG.createAssign(cc->getName(), charset_expr(cc));
     49            }
     50        }
     51        else if (const Seq* seq = dyn_cast<Seq>(i->second)) {
     52            //This is specifically for the utf8 multibyte character classes.
     53            assert (seq->getType() == Seq::Type::Byte);
     54            Assign * assignment = nullptr;
     55            auto j = seq->begin();
     56            while (true) {
     57                Name * name = dyn_cast<Name>(*j);
     58                assert (name);
     59                CharClass * cc = mCG.createCharClass(name->getName());
     60                PabloE * sym = assignment ? mCG.createAnd(mCG.createVar(assignment->getName()), cc) : cc;
     61                if (++j != seq->end()) {
     62                    assignment = mCG.createAssign(mCG.ssa("marker"), mCG.createAdvance(sym));
     63                    continue;
    7064                }
    71             }
    72         }
    73     }
    74 }
    75 
    76 void CC_Compiler::process_re(const RE * re) {
    77     if (const Alt * alt = dyn_cast<const Alt>(re)) {
    78         for (const RE * re : *alt) {
    79             process_re(re);
    80         }
    81     }
    82     else if (const CC * cc = dyn_cast<const CC>(re)) {
    83         process(cc);
    84     }
    85     else if (const Rep* re_rep = dyn_cast<const Rep>(re)) {
    86         process_re(re_rep->getRE());
    87     }
    88     else if (const Seq* re_seq = dyn_cast<const Seq>(re)) {
    89         for (const RE * re : *re_seq) {
    90             process_re(re);
    91         }
    92     }
    93 }
    94 
    95 inline void CC_Compiler::process(const CC * cc) {
    96     if (mComputedSet.insert(cc->getName()).second) {
    97         // Add the new mapping to the list of pablo statements:
    98         mCG.createAssign(cc->getName(), charset_expr(cc));
    99     }
    100 }
    101 
    102 PabloE * CC_Compiler::bit_pattern_expr(const unsigned pattern, unsigned selected_bits)
    103 {
    104     if (selected_bits == 0) {
    105         return mCG.createAll(1);
    106     }
    107 
    108     std::vector<PabloE*> bit_terms;
    109     unsigned i = 0;
    110 
    111     while (selected_bits)
    112     {
    113         unsigned test_bit = 1 << i;
    114         if (selected_bits & test_bit)
    115         {
    116             if ((pattern & test_bit) == 0)
    117             {
    118                 bit_terms.push_back(mCG.createNot(getBasisVar(i)));
    119             }
    120             else
    121             {
    122                 bit_terms.push_back(getBasisVar(i));
    123             }
    124         }
    125         else
    126         {
    127             bit_terms.push_back(mCG.createAll(1));
    128         }
    129         selected_bits &= ~test_bit;
    130         i++;
    131     }
    132 
    133     //Reduce the list so that all of the expressions are contained within a single expression.
    134     while (bit_terms.size() > 1)
    135     {
    136         std::vector<PabloE*> new_terms;
    137         for (unsigned long i = 0; i < (bit_terms.size()/2); i++)
    138         {
    139             new_terms.push_back(mCG.createAnd(bit_terms[(2 * i) + 1], bit_terms[2 * i]));
    140         }
    141         if (bit_terms.size() % 2 == 1)
    142         {
    143             new_terms.push_back(bit_terms[bit_terms.size() -1]);
    144         }
    145         bit_terms.assign(new_terms.begin(), new_terms.end());
    146     }
    147     return bit_terms[0];
    148 }
    149 
    150 PabloE * CC_Compiler::char_test_expr(const CodePointType ch)
    151 {
    152     return bit_pattern_expr(ch, mEncoding.getMask());
    153 }
    154 
    155 PabloE * CC_Compiler::make_range(const CodePointType n1, const CodePointType n2)
    156 {
    157     CodePointType diff_count = 0;
    158 
    159     for (CodePointType diff_bits = n1 ^ n2; diff_bits; diff_count++, diff_bits >>= 1);
    160 
    161     if ((n2 < n1) || (diff_count > mEncoding.getBits()))
    162     {
    163         throw std::runtime_error(std::string("Bad Range: [") + std::to_string(n1) + "," + std::to_string(n2) + "]");
    164     }
    165 
    166     const CodePointType mask0 = (static_cast<CodePointType>(1) << diff_count) - 1;
    167 
    168     PabloE * common = bit_pattern_expr(n1 & ~mask0, mEncoding.getMask() ^ mask0);
    169 
    170     if (diff_count == 0) return common;
    171 
    172     const CodePointType mask1 = (static_cast<CodePointType>(1) << (diff_count - 1)) - 1;
    173 
    174     PabloE* lo_test = GE_Range(diff_count - 1, n1 & mask1);
    175     PabloE* hi_test = LE_Range(diff_count - 1, n2 & mask1);
    176 
    177     return mCG.createAnd(common, mCG.createSel(getBasisVar(diff_count - 1), hi_test, lo_test));
    178 }
    179 
    180 PabloE * CC_Compiler::GE_Range(const unsigned N, const unsigned n) {
    181     if (N == 0)
    182     {
    183         return mCG.createAll(1); //Return a true literal.
    184     }
    185     else if (((N % 2) == 0) && ((n >> (N - 2)) == 0))
    186     {
    187         return mCG.createOr(mCG.createOr(getBasisVar(N - 1), getBasisVar(N - 2)), GE_Range(N - 2, n));
    188     }
    189     else if (((N % 2) == 0) && ((n >> (N - 2)) == 3))
    190     {
    191         return mCG.createAnd(mCG.createAnd(getBasisVar(N - 1), getBasisVar(N - 2)), GE_Range(N - 2, n - (3 << (N - 2))));
    192     }
    193     else if (N >= 1)
    194     {
    195         int hi_bit = n & (1 << (N - 1));
    196         int lo_bits = n - hi_bit;
    197         PabloE * lo_range = GE_Range(N - 1, lo_bits);
    198         if (hi_bit == 0)
    199         {
    200             /*
    201               If the hi_bit of n is not set, then whenever the corresponding bit
    202               is set in the target, the target will certaily be >=.  Oterwise,
    203               the value of GE_range(N-1), lo_range) is required.
    204             */
    205             return mCG.createOr(getBasisVar(N - 1), lo_range);
    206         }
    207         else
    208         {
    209             /*
    210               If the hi_bit of n is set, then the corresponding bit must be set
    211               in the target for >= and GE_range(N-1, lo_bits) must also be true.
    212             */
    213             return mCG.createAnd(getBasisVar(N - 1), lo_range);
    214         }
    215     }
    216     throw std::runtime_error("Unexpected input given to ge_range: " + std::to_string(N) + ", " + std::to_string(n));
    217 }
    218 
    219 PabloE * CC_Compiler::LE_Range(const unsigned N, const unsigned n)
    220 {
    221     /*
    222       If an N-bit pattern is all ones, then it is always true that any n-bit value is LE this pattern.
    223       Handling this as a special case avoids an overflow issue with n+1 requiring more than N bits.
    224     */
    225     if ((n + 1) == (1 << N)) {
    226         return mCG.createAll(1); //True.
    227     }
    228     else {
    229         return mCG.createNot(GE_Range(N, n + 1));
     65                mCG.createAssign(seq->getName(), sym);
     66                break;
     67            }
     68        }
    23069    }
    23170}
     
    276115}
    277116
     117PabloE * CC_Compiler::bit_pattern_expr(const unsigned pattern, unsigned selected_bits)
     118{
     119    if (selected_bits == 0) {
     120        return mCG.createAll(1);
     121    }
     122
     123    std::vector<PabloE*> bit_terms;
     124    unsigned i = 0;
     125
     126    while (selected_bits)
     127    {
     128        unsigned test_bit = 1 << i;
     129        if (selected_bits & test_bit)
     130        {
     131            if ((pattern & test_bit) == 0)
     132            {
     133                bit_terms.push_back(mCG.createNot(getBasisVar(i)));
     134            }
     135            else
     136            {
     137                bit_terms.push_back(getBasisVar(i));
     138            }
     139        }
     140        else
     141        {
     142            bit_terms.push_back(mCG.createAll(1));
     143        }
     144        selected_bits &= ~test_bit;
     145        i++;
     146    }
     147
     148    //Reduce the list so that all of the expressions are contained within a single expression.
     149    while (bit_terms.size() > 1)
     150    {
     151        std::vector<PabloE*> new_terms;
     152        for (unsigned long i = 0; i < (bit_terms.size()/2); i++)
     153        {
     154            new_terms.push_back(mCG.createAnd(bit_terms[(2 * i) + 1], bit_terms[2 * i]));
     155        }
     156        if (bit_terms.size() % 2 == 1)
     157        {
     158            new_terms.push_back(bit_terms[bit_terms.size() -1]);
     159        }
     160        bit_terms.assign(new_terms.begin(), new_terms.end());
     161    }
     162    return bit_terms[0];
     163}
     164
     165PabloE * CC_Compiler::char_test_expr(const CodePointType ch)
     166{
     167    return bit_pattern_expr(ch, mEncoding.getMask());
     168}
     169
     170PabloE * CC_Compiler::make_range(const CodePointType n1, const CodePointType n2)
     171{
     172    CodePointType diff_count = 0;
     173
     174    for (CodePointType diff_bits = n1 ^ n2; diff_bits; diff_count++, diff_bits >>= 1);
     175
     176    if ((n2 < n1) || (diff_count > mEncoding.getBits()))
     177    {
     178        throw std::runtime_error(std::string("Bad Range: [") + std::to_string(n1) + "," + std::to_string(n2) + "]");
     179    }
     180
     181    const CodePointType mask0 = (static_cast<CodePointType>(1) << diff_count) - 1;
     182
     183    PabloE * common = bit_pattern_expr(n1 & ~mask0, mEncoding.getMask() ^ mask0);
     184
     185    if (diff_count == 0) return common;
     186
     187    const CodePointType mask1 = (static_cast<CodePointType>(1) << (diff_count - 1)) - 1;
     188
     189    PabloE* lo_test = GE_Range(diff_count - 1, n1 & mask1);
     190    PabloE* hi_test = LE_Range(diff_count - 1, n2 & mask1);
     191
     192    return mCG.createAnd(common, mCG.createSel(getBasisVar(diff_count - 1), hi_test, lo_test));
     193}
     194
     195PabloE * CC_Compiler::GE_Range(const unsigned N, const unsigned n) {
     196    if (N == 0)
     197    {
     198        return mCG.createAll(1); //Return a true literal.
     199    }
     200    else if (((N % 2) == 0) && ((n >> (N - 2)) == 0))
     201    {
     202        return mCG.createOr(mCG.createOr(getBasisVar(N - 1), getBasisVar(N - 2)), GE_Range(N - 2, n));
     203    }
     204    else if (((N % 2) == 0) && ((n >> (N - 2)) == 3))
     205    {
     206        return mCG.createAnd(mCG.createAnd(getBasisVar(N - 1), getBasisVar(N - 2)), GE_Range(N - 2, n - (3 << (N - 2))));
     207    }
     208    else if (N >= 1)
     209    {
     210        int hi_bit = n & (1 << (N - 1));
     211        int lo_bits = n - hi_bit;
     212        PabloE * lo_range = GE_Range(N - 1, lo_bits);
     213        if (hi_bit == 0)
     214        {
     215            /*
     216              If the hi_bit of n is not set, then whenever the corresponding bit
     217              is set in the target, the target will certaily be >=.  Oterwise,
     218              the value of GE_range(N-1), lo_range) is required.
     219            */
     220            return mCG.createOr(getBasisVar(N - 1), lo_range);
     221        }
     222        else
     223        {
     224            /*
     225              If the hi_bit of n is set, then the corresponding bit must be set
     226              in the target for >= and GE_range(N-1, lo_bits) must also be true.
     227            */
     228            return mCG.createAnd(getBasisVar(N - 1), lo_range);
     229        }
     230    }
     231    throw std::runtime_error("Unexpected input given to ge_range: " + std::to_string(N) + ", " + std::to_string(n));
     232}
     233
     234PabloE * CC_Compiler::LE_Range(const unsigned N, const unsigned n)
     235{
     236    /*
     237      If an N-bit pattern is all ones, then it is always true that any n-bit value is LE this pattern.
     238      Handling this as a special case avoids an overflow issue with n+1 requiring more than N bits.
     239    */
     240    if ((n + 1) == (1 << N)) {
     241        return mCG.createAll(1); //True.
     242    }
     243    else {
     244        return mCG.createNot(GE_Range(N, n + 1));
     245    }
     246}
     247
    278248inline PabloE * CC_Compiler::char_or_range_expr(const CodePointType lo, const CodePointType hi) {
    279249    if (lo == hi) {
     
    286256}
    287257
     258inline Var * CC_Compiler::getBasisVar(const int i) const {
     259    return mBasisBit[(mEncoding.getBits() - 1) - i];
     260}
     261
    288262} // end of namespace cc
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r4234 r4242  
    1515#include <string>
    1616#include <set>
     17#include <re/re_reducer.h>
    1718
    1819namespace cc {
    1920
    2021class CC_Compiler{
    21     typedef std::map<std::string, re::RE *> REMap;
    2222    typedef std::set<std::string>           ComputedSet;
    2323public:
    2424
    25     CC_Compiler(pablo::PabloBlock & cg, const Encoding encoding, const std::string basis_pattern = "basis", const std::string gensym_pattern = "temp");
     25    typedef std::vector<pablo::Var *>       BasisBitVars;
    2626
    27     void compile(const REMap & re_map);
     27    CC_Compiler(pablo::PabloBlock & cg, const Encoding encoding, const std::string basis_pattern = "basis");
    2828
    29     const std::string getBasisPattern() const {
    30         return mBasisPattern;
     29    void compile(const re::RENameMap & re_map);
     30
     31    const BasisBitVars & getBasisBitVars() const {
     32        return mBasisBit;
    3133    }
    3234
    3335private:
    34     void process_re(const re::RE *re);
    3536    pablo::Var * getBasisVar(const int n) const;
    3637    pablo::PabloE * bit_pattern_expr(const unsigned pattern, unsigned selected_bits);
     
    4142    pablo::PabloE * char_or_range_expr(const re::CodePointType lo, const re::CodePointType hi);
    4243    pablo::PabloE * charset_expr(const re::CC *cc);
    43     void process(const re::CC *cc);
    4444
    45     pablo::PabloBlock &                                 mCG;
    46     std::vector<pablo::Var *>                           mBasisBit;
    47     const Encoding                                      mEncoding;
    48     const std::string                                   mGenSymPattern;
    49     const std::string                                   mBasisPattern;
    50     ComputedSet                                         mComputedSet;
     45    pablo::PabloBlock &         mCG;
     46    BasisBitVars                mBasisBit;
     47    const Encoding              mEncoding;
     48    ComputedSet                 mComputedSet;
    5149};
    5250
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r4237 r4242  
    4242namespace icgrep {
    4343
    44 LLVM_Gen_RetVal compile(bool show_compile_time, Encoding encoding, std::string input_string) {
     44LLVM_Gen_RetVal compile(const Encoding encoding, const std::string input_string, const bool show_compile_time) {
    4545    RE * re_ast = nullptr;
    4646    try
    4747    {
    48         re_ast = RE_Parser::parse_re(input_string);
     48        re_ast = RE_Parser::parse(input_string);
    4949    }
    5050    catch (ParseFailure failure)
     
    8989
    9090    //Map all of the unique character classes in order to reduce redundancy.
    91     std::map<std::string, RE*> re_map;
     91    RENameMap re_map;
    9292    re_ast = RE_Reducer::reduce(re_ast, re_map);
    9393
     
    137137    #endif
    138138
    139     RE_Compiler pbix_compiler(main, name_map);
    140     pbix_compiler.compile(re_ast);
     139    RE_Compiler re_compiler(main, name_map);
     140    re_compiler.compile(re_ast);
    141141    #ifdef DEBUG_PRINT_PBIX_AST
    142142    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     
    144144    #endif
    145145
    146     PabloCompiler irgen(name_map, cc_compiler.getBasisPattern(), encoding.getBits());
     146    PabloCompiler pablo_compiler(name_map, cc_compiler.getBasisBitVars(), encoding.getBits());
    147147    unsigned long long cycles = 0;
    148148    double timer = 0;
     
    153153    }
    154154
    155     LLVM_Gen_RetVal retVal = irgen.compile(main);
     155    LLVM_Gen_RetVal retVal = pablo_compiler.compile(main);
    156156    if (show_compile_time)
    157157    {
  • icGREP/icgrep-devel/icgrep/compiler.h

    r4237 r4242  
    1414namespace icgrep {
    1515
    16 pablo::LLVM_Gen_RetVal compile(bool show_compile_time, Encoding encoding, std::string input_string);
     16pablo::LLVM_Gen_RetVal compile(const Encoding encoding, const std::string input_string, const bool show_compile_time = false);
    1717
    1818}
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4237 r4242  
    231231        timer = getElapsedTime();
    232232    }
    233     const auto llvm_codegen = icgrep::compile(compile_time_option, encoding, (regex_from_file_option ? fileregex : inregex));
     233    const auto llvm_codegen = icgrep::compile(encoding, (regex_from_file_option ? fileregex : inregex), compile_time_option);
    234234
    235235    if (compile_time_option)
     
    319319    struct Basis_bits basis_bits;
    320320    struct Output output;
    321     BitBlock match_vector = simd<1>::constant<0>();
    322     BitBlock * carry_q = new BitBlock[carry_count];
     321    BitBlock match_vector;
     322    BitBlock carry_q[carry_count];
    323323    int match_count=0;
    324324    int blk = 0;
     
    336336    ScannerT match_scanner;
    337337
     338    match_vector = simd<1>::constant<0>();
    338339    memset (carry_q, 0, sizeof(BitBlock) * carry_count);
    339340
     
    524525
    525526    buffer_pos += chars_avail;
    526     delete carry_q;
    527527}
    528528
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4240 r4242  
    7272namespace pablo {
    7373
    74 PabloCompiler::PabloCompiler(std::map<std::string, std::string> name_map, std::string basis_pattern, int bits)
     74PabloCompiler::PabloCompiler(std::map<std::string, std::string> name_map, const BasisBitVars & basisBitVars, int bits)
    7575: mBits(bits)
    7676, m_name_map(name_map)
    77 , mBasisBitPattern(basis_pattern)
     77, mBasisBitVars(basisBitVars)
    7878, mMod(new Module("icgrep", getGlobalContext()))
    7979, mBasicBlock(nullptr)
     
    9797    InitializeNativeTarget();
    9898    std::string ErrStr;
    99 
    10099    mExecutionEngine = EngineBuilder(mMod).setUseMCJIT(true).setErrorStr(&ErrStr).setOptLevel(CodeGenOpt::Level::Less).create();
    101100    if (mExecutionEngine == nullptr) {
    102         throw std::runtime_error("\nCould not create ExecutionEngine: " + ErrStr);
     101        throw std::runtime_error("Could not create ExecutionEngine: " + ErrStr);
    103102    }
    104103
     
    170169        Value* basisBit = b.CreateLoad(mBasisBitsAddr);
    171170        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
    172         const std::string name = mBasisBitPattern + std::to_string(i);
     171        const std::string name = mBasisBitVars[i]->getName();
    173172        Value* basis_bits_struct_GEP = b.CreateGEP(basisBit, indices, name);
    174173        mMarkerMap.insert(make_pair(name, basis_bits_struct_GEP));
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4240 r4242  
    1414#include <pablo/codegenstate.h>
    1515#include <pablo/pe_pabloe.h>
     16#include <cc/cc_compiler.h>
    1617#include "unicode_categories.h"
    1718#include <iostream>
     
    8384    #endif
    8485public:
    85     PabloCompiler(std::map<std::string, std::string> name_map, std::string basis_pattern, int bits);
     86    typedef cc::CC_Compiler::BasisBitVars BasisBitVars;
     87    PabloCompiler(std::map<std::string, std::string> name_map, const BasisBitVars & basisBitVars, int bits);
    8688    ~PabloCompiler();
    8789    LLVM_Gen_RetVal compile(const PabloBlock & cg_state);
     
    120122    int                                 mBits;
    121123    std::map<std::string, std::string>  m_name_map;
    122     std::string                         mBasisBitPattern;
     124    const BasisBitVars &                mBasisBitVars;
    123125
    124126    Module* const                       mMod;
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4234 r4242  
    1616namespace re {
    1717
    18 RE * RE_Parser::parse_re(const std::string & regular_expression, const bool allow_escapes_within_charset) {
     18RE * RE_Parser::parse(const std::string & regular_expression, const bool allow_escapes_within_charset) {
    1919    RE_Parser parser(regular_expression, allow_escapes_within_charset);
    2020    RE * re = parser.parse_alt(false);
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4194 r4242  
    2222public:
    2323
    24     static RE * parse_re(const std::string &intput_string, const bool allow_escapes_within_charset = false);
     24    static RE * parse(const std::string &intput_string, const bool allow_escapes_within_charset = false);
    2525
    2626private:
  • icGREP/icgrep-devel/icgrep/re/re_reducer.cpp

    r4203 r4242  
    1111namespace re {
    1212
    13 RE * RE_Reducer::reduce(RE * re, std::map<std::string, RE*>& re_map) {
     13RE * RE_Reducer::reduce(RE * re, RENameMap & re_map) {
    1414    assert (re);
    1515    if (Alt * alt = dyn_cast<Alt>(re)) {
     
    2424        if (seq->getType() == Seq::Type::Byte) {
    2525            //If this is a sequence of byte classes then this is a multibyte sequence for a Unicode character class.
    26             std::string seqname = seq->getName();
    27             re_map.insert(make_pair(seqname, seq));
     26            const std::string seqname = seq->getName();
     27            re_map.insert(std::make_pair(seqname, seq));
    2828            re = makeName(seqname, false, Name::Type::Unicode);
    2929        }
     
    3333    }
    3434    else if (CC * cc = dyn_cast<CC>(re)) {
    35         std::string ccname = cc->getName();
     35        const std::string ccname = cc->getName();
    3636        //If the character class isn't in the map then add it.
    37         re_map.insert(make_pair(ccname, cc));
     37        re_map.insert(std::make_pair(ccname, cc));
    3838        //return a new name class with the name of the character class.
    3939        re = makeName(ccname);
  • icGREP/icgrep-devel/icgrep/re/re_reducer.h

    r4194 r4242  
    1010namespace re {
    1111
     12typedef std::map<std::string, RE*> RENameMap;
     13
    1214class RE_Reducer
    1315{
    1416public:
    15     static RE* reduce(RE* re, std::map<std::string, RE*>& re_map);
     17    static RE* reduce(RE* re, RENameMap & re_map);
    1618};
    1719
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4203 r4242  
    5555
    5656RE * UTF8_Encoder::rangeToUTF8(const CharSetItem & item) {
    57     int u8len_lo = u8len(item.lo_codepoint);
    58     int u8len_hi = u8len(item.hi_codepoint);
     57    int u8len_lo = lenUTF8(item.lo_codepoint);
     58    int u8len_hi = lenUTF8(item.hi_codepoint);
    5959    if (u8len_lo < u8len_hi) {
    60         int m = max_of_u8len(u8len_lo);
     60        int m = maxUTF8Len(u8len_lo);
    6161        return makeAlt({rangeToUTF8(CharSetItem(item.lo_codepoint, m)), rangeToUTF8(CharSetItem(m + 1, item.hi_codepoint))});
    6262    }
     
    7777    else if (hbyte == lbyte)
    7878    {
    79         Seq* seq = makeSeq(u8Prefix(hbyte) ? Seq::Type::Byte : Seq::Type::Normal);
     79        Seq* seq = makeSeq(isUTF8Prefix(hbyte) ? Seq::Type::Byte : Seq::Type::Normal);
    8080        seq->push_back(makeByteClass(hbyte));
    8181        seq->push_back(rangeToUTF8_helper(lo, hi, n+1, hlen));
     
    103103        {
    104104            Seq* seq = makeSeq();
    105             seq->setType((u8Prefix(hbyte) ? Seq::Type::Byte : Seq::Type::Normal));
     105            seq->setType((isUTF8Prefix(hbyte) ? Seq::Type::Byte : Seq::Type::Normal));
    106106            seq->push_back(makeByteRange(lbyte, hbyte));
    107107            seq->push_back(rangeToUTF8_helper(lo, hi, n + 1, hlen));
     
    111111}
    112112
    113 bool UTF8_Encoder::u8Prefix(int cp)
    114 {
     113inline bool UTF8_Encoder::isUTF8Prefix(const int cp) {
    115114    return ((cp >= 0xC2) && (cp <= 0xF4));
    116115}
     
    126125}
    127126
    128 int UTF8_Encoder::u8byte(int codepoint, int n)
     127inline int UTF8_Encoder::u8byte(int codepoint, int n)
    129128{
    130129    int retVal = 0;
    131130
    132     int len = u8len(codepoint);
     131    int len = lenUTF8(codepoint);
    133132
    134133    if (n == 1)
     
    159158}
    160159
    161 int UTF8_Encoder::u8len(int cp)
     160inline int UTF8_Encoder::lenUTF8(const int cp)
    162161{
    163162    if (cp <= 0x7F)
     
    179178}
    180179
    181 int UTF8_Encoder::max_of_u8len(int lgth)
     180inline int UTF8_Encoder::maxUTF8Len(int lgth)
    182181{
    183182    if (lgth == 1)
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r4194 r4242  
    2323    static re::CC* makeByteRange(int lo, int hi);
    2424
    25     static bool u8Prefix(int cp);
    26     static int u8len(int cp);
    27     static int max_of_u8len(int lgth);
     25    static bool isUTF8Prefix(const int cp);
     26    static int lenUTF8(const int cp);
     27    static int maxUTF8Len(int lgth);
    2828    static int u8byte(int codepoint, int n);
    2929};
Note: See TracChangeset for help on using the changeset viewer.