Changeset 4612


Ignore:
Timestamp:
Jun 21, 2015, 4:38:51 PM (4 years ago)
Author:
nmedfort
Message:

Multiplexing bug fix and some CC changes.

Location:
icGREP/icgrep-devel/icgrep
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/CaseFolding_txt.cpp

    r4318 r4612  
    1010
    1111int findFoldEntry(codepoint_t cp) {
    12     int lo = 0; 
     12    int lo = 0;
    1313    int hi = foldTableSize;
    1414    while (hi - lo > 1) {
    15       int mid = (lo + hi)/2;
    16       if (cp < foldTable[mid].range_lo) {
    17         hi = mid;
    18       }
    19       else {
    20         lo = mid;
    21       }
     15        int mid = (lo + hi)/2;
     16        if (cp < foldTable[mid].range_lo) {
     17            hi = mid;
     18        }
     19        else {
     20            lo = mid;
     21        }
    2222    }
    2323    return lo;
     
    2828}
    2929
    30 void caseInsensitiveInsertRange(re::CC * base_cc, codepoint_t lo, codepoint_t hi) {
    31     base_cc->insert_range(lo, hi);
     30void caseInsensitiveInsertRange(re::CC * cc, codepoint_t lo, codepoint_t hi) {
     31    cc->insert_range(lo, hi);
    3232    // Find the first foldTable entry overlapping the (lo, hi) range.
    3333    int e = findFoldEntry(lo);
     
    3535    // Keep processing until we are done.
    3636    while (foldTable[e].range_lo <= hi) {
    37       const FoldEntry & fe = foldTable[e];
    38       const FoldEntry & fnext = foldTable[e + 1];
    39       // Constrain (lo, hi) to this entry only.
    40       codepoint_t lo1 = std::max(lo, fe.range_lo);
    41       codepoint_t hi1 = std::min(hi, fnext.range_lo - 1);
    42       if (fe.fold_offset > 0 && fe.range_lo + fe.fold_offset < fnext.range_lo) {
    43         //
    44         // There are more than fold_offset values in the range, meaning that
    45         // we have an extended range with alternating subranges of positive
    46         // and negative offsets. 
    47         // First find the negative offset subrange.
    48         codepoint_t subrange_lo = lo1 - ((lo1 - fe.range_lo) % (2 * fe.fold_offset));
    49         codepoint_t negative_subrange_lo = subrange_lo + fe.fold_offset;
    50         codepoint_t negative_subrange_hi = subrange_lo + 2 * fe.fold_offset - 1;
    51         if ((lo1 <= negative_subrange_hi) && (hi1 >= negative_subrange_lo)) {
    52            // negative offsets apply
    53            base_cc -> insert_range(std::max(negative_subrange_lo,lo1) - fe.fold_offset, std::min(negative_subrange_hi, hi1) - fe.fold_offset);
    54         }
    55         // Now the positive offset subrange.
    56         codepoint_t positive_subrange_lo = hi1 - ((hi1 - fe.range_lo) % (2 * fe.fold_offset));
    57         codepoint_t positive_subrange_hi = positive_subrange_lo + fe.fold_offset - 1;
    58         if ((lo1 <= positive_subrange_hi) && (hi1 >= positive_subrange_lo)) {
    59            base_cc -> insert_range(std::max(positive_subrange_lo, lo1) + fe.fold_offset, std::min(positive_subrange_hi, hi1) + fe.fold_offset);
    60         }
    61       }
    62       else if (fe.fold_offset != 0) {
    63         // We have either a positive or negative offset, and all offsets for
    64         // this entry have the same sign.
    65         base_cc -> insert_range(lo1 + fe.fold_offset, hi1 + fe.fold_offset);
    66       }
    67       // Now pick up any individual fold entries.
    68       for (int i = 0; i < fe.fold_pairs.size(); i++) {
    69         if (fe.fold_pairs[i].first < lo) continue;  // Only possible for first fold_entry.
    70         if (fe.fold_pairs[i].first > hi) break;     // Only possible for last fold_entry.
    71         base_cc->insert(fe.fold_pairs[i].second);
    72       }
    73       // Move on to the next fold_entry.
    74       e++;
     37        const FoldEntry & fe = foldTable[e];
     38        const FoldEntry & fnext = foldTable[e + 1];
     39        // Constrain (lo, hi) to this entry only.
     40        codepoint_t lo1 = std::max(lo, fe.range_lo);
     41        codepoint_t hi1 = std::min(hi, fnext.range_lo - 1);
     42        if (fe.fold_offset > 0 && fe.range_lo + fe.fold_offset < fnext.range_lo) {
     43            //
     44            // There are more than fold_offset values in the range, meaning that
     45            // we have an extended range with alternating subranges of positive
     46            // and negative offsets.
     47            // First find the negative offset subrange.
     48            codepoint_t subrange_lo = lo1 - ((lo1 - fe.range_lo) % (2 * fe.fold_offset));
     49            codepoint_t negative_subrange_lo = subrange_lo + fe.fold_offset;
     50            codepoint_t negative_subrange_hi = subrange_lo + 2 * fe.fold_offset - 1;
     51            if ((lo1 <= negative_subrange_hi) && (hi1 >= negative_subrange_lo)) {
     52                // negative offsets apply
     53                cc->insert_range(std::max(negative_subrange_lo,lo1) - fe.fold_offset, std::min(negative_subrange_hi, hi1) - fe.fold_offset);
     54            }
     55            // Now the positive offset subrange.
     56            codepoint_t positive_subrange_lo = hi1 - ((hi1 - fe.range_lo) % (2 * fe.fold_offset));
     57            codepoint_t positive_subrange_hi = positive_subrange_lo + fe.fold_offset - 1;
     58            if ((lo1 <= positive_subrange_hi) && (hi1 >= positive_subrange_lo)) {
     59                cc->insert_range(std::max(positive_subrange_lo, lo1) + fe.fold_offset, std::min(positive_subrange_hi, hi1) + fe.fold_offset);
     60            }
     61        }
     62        else if (fe.fold_offset != 0) {
     63            // We have either a positive or negative offset, and all offsets for
     64            // this entry have the same sign.
     65            cc->insert_range(lo1 + fe.fold_offset, hi1 + fe.fold_offset);
     66        }
     67        // Now pick up any individual fold entries.
     68        for (int i = 0; i < fe.fold_pairs.size(); i++) {
     69            if (fe.fold_pairs[i].first < lo) continue;  // Only possible for first fold_entry.
     70            if (fe.fold_pairs[i].first > hi) break;     // Only possible for last fold_entry.
     71            cc->insert(fe.fold_pairs[i].second);
     72        }
     73        // Move on to the next fold_entry.
     74        e++;
    7575    }
    7676}
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4602 r4612  
    2727namespace cc {
    2828
    29 CC_Compiler::CC_Compiler(PabloBlock & cg, const Encoding encoding, const std::string basis_pattern)
    30 : mCG(cg)
     29CC_Compiler::CC_Compiler(PabloBlock & entry, const Encoding encoding, const std::string basis_pattern)
     30: mBuilder(entry)
    3131, mBasisBit(encoding.getBits())
    3232, mEncoding(encoding)
    3333{
    3434    for (int i = 0; i < mEncoding.getBits(); i++) {
    35         mBasisBit[i] = mCG.createVar(basis_pattern + std::to_string(i));
    36     }
    37 }
    38 
    39 Assign * CC_Compiler::compileCC(const CC *cc) {
    40     return compileCC(cc, mCG);
    41 }
    42 
    43 Assign * CC_Compiler::compileCC(const CC *cc, PabloBlock & block) {
    44     PabloBuilder pb(block);
    45     return compileCC(cc, pb);
    46 }
    47 
    48 Assign * CC_Compiler::compileCC(const CC *cc, PabloBuilder & pb) {
    49     return pb.createAssign(cc->canonicalName(ByteClass), charset_expr(cc, pb));
    50 }
    51 
    52 std::vector<Var *> CC_Compiler::getBasisBits(const CC_NameMap & nameMap) {
    53     return mBasisBit;
     35        mBasisBit[i] = mBuilder.createVar(basis_pattern + std::to_string(i));
     36    }
     37}
     38
     39Assign * CC_Compiler::compileCC(const std::string && canonicalName, const CC *cc, PabloBlock & block) {
     40    return block.createAssign(std::move(canonicalName), charset_expr(cc, block));
     41}
     42
     43Assign * CC_Compiler::compileCC(const std::string && canonicalName, const CC *cc, PabloBuilder & builder) {
     44    return builder.createAssign(std::move(canonicalName), charset_expr(cc, builder));
    5445}
    5546
     
    8677            }
    8778            else if (name->getCompiled() == nullptr) {
    88                 name->setCompiled(compileCC(cast<CC>(def), mCG));
     79                name->setCompiled(compileCC(cast<CC>(def)));
    8980            }
    9081        }
     
    9586}
    9687
    97 
    98 
    99 PabloAST * CC_Compiler::charset_expr(const CC * cc, PabloBuilder & pb) {
     88template<typename PabloBlockOrBuilder>
     89PabloAST * CC_Compiler::charset_expr(const CC * cc, PabloBlockOrBuilder & pb) {
    10090    if (cc->empty()) {
    10191        return pb.createZeroes();
     
    120110            }
    121111            if (combine) {
    122                 CodePointType lo = cc->front().lo_codepoint;
    123                 CodePointType hi = cc->back().lo_codepoint;
    124                 const CodePointType mask = mEncoding.getMask();
     112                codepoint_t lo = cc->front().lo_codepoint;
     113                codepoint_t hi = cc->back().lo_codepoint;
     114                const codepoint_t mask = mEncoding.getMask();
    125115                lo &= (mask - 1);
    126116                hi |= (mask ^ (mask - 1));
     
    142132}
    143133
    144 PabloAST * CC_Compiler::bit_pattern_expr(const unsigned pattern, unsigned selected_bits, PabloBuilder &pb)
     134template<typename PabloBlockOrBuilder>
     135PabloAST * CC_Compiler::bit_pattern_expr(const unsigned pattern, unsigned selected_bits, PabloBlockOrBuilder &pb)
    145136{
    146137    if (selected_bits == 0) {
     
    192183}
    193184
    194 inline PabloAST * CC_Compiler::char_test_expr(const CodePointType ch, PabloBuilder &pb) {
     185template<typename PabloBlockOrBuilder>
     186inline PabloAST * CC_Compiler::char_test_expr(const codepoint_t ch, PabloBlockOrBuilder &pb) {
    195187    return bit_pattern_expr(ch, mEncoding.getMask(), pb);
    196188}
    197189
    198 PabloAST * CC_Compiler::make_range(const CodePointType n1, const CodePointType n2, PabloBuilder & pb) {
    199     CodePointType diff_count = 0;
    200 
    201     for (CodePointType diff_bits = n1 ^ n2; diff_bits; diff_count++, diff_bits >>= 1);
     190template<typename PabloBlockOrBuilder>
     191PabloAST * CC_Compiler::make_range(const codepoint_t n1, const codepoint_t n2, PabloBlockOrBuilder & pb) {
     192    codepoint_t diff_count = 0;
     193
     194    for (codepoint_t diff_bits = n1 ^ n2; diff_bits; diff_count++, diff_bits >>= 1);
    202195
    203196    if ((n2 < n1) || (diff_count > mEncoding.getBits()))
     
    206199    }
    207200
    208     const CodePointType mask0 = (static_cast<CodePointType>(1) << diff_count) - 1;
     201    const codepoint_t mask0 = (static_cast<codepoint_t>(1) << diff_count) - 1;
    209202
    210203    PabloAST * common = bit_pattern_expr(n1 & ~mask0, mEncoding.getMask() ^ mask0, pb);
     
    212205    if (diff_count == 0) return common;
    213206
    214     const CodePointType mask1 = (static_cast<CodePointType>(1) << (diff_count - 1)) - 1;
     207    const codepoint_t mask1 = (static_cast<codepoint_t>(1) << (diff_count - 1)) - 1;
    215208
    216209    PabloAST* lo_test = GE_Range(diff_count - 1, n1 & mask1, pb);
     
    220213}
    221214
    222 PabloAST * CC_Compiler::GE_Range(const unsigned N, const unsigned n, PabloBuilder &pb) {
     215template<typename PabloBlockOrBuilder>
     216PabloAST * CC_Compiler::GE_Range(const unsigned N, const unsigned n, PabloBlockOrBuilder &pb) {
    223217    if (N == 0) {
    224218        return pb.createOnes(); //Return a true literal.
     
    256250}
    257251
    258 PabloAST * CC_Compiler::LE_Range(const unsigned N, const unsigned n, PabloBuilder &pb)
     252template<typename PabloBlockOrBuilder>
     253PabloAST * CC_Compiler::LE_Range(const unsigned N, const unsigned n, PabloBlockOrBuilder &pb)
    259254{
    260255    /*
     
    270265}
    271266
    272 inline PabloAST * CC_Compiler::char_or_range_expr(const CodePointType lo, const CodePointType hi, PabloBuilder &pb) {
     267template<typename PabloBlockOrBuilder>
     268inline PabloAST * CC_Compiler::char_or_range_expr(const codepoint_t lo, const codepoint_t hi, PabloBlockOrBuilder &pb) {
    273269    if (lo == hi) {
    274270        return char_test_expr(lo, pb);
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r4602 r4612  
    99
    1010#include <re/re_cc.h>
     11#include <pablo/builder.hpp>
    1112#include "utf_encoding.h"
    1213#include <string>
    13 
    14 namespace pablo {
    15     class PabloAST;
    16     class PabloBuilder;
    17     class PabloBlock;
    18     class Var;
    19     class Assign;
    20 }
    2114
    2215namespace cc {
     
    2720public:
    2821
    29     CC_Compiler(pablo::PabloBlock & cg, const Encoding encoding, const std::string basis_pattern = "basis");
     22    using Vars = std::vector<pablo::Var *>;
    3023
    31     std::vector<pablo::Var *> getBasisBits(const CC_NameMap & nameMap);
     24    CC_Compiler(pablo::PabloBlock & entry, const Encoding encoding, const std::string basis_pattern = "basis");
     25
     26    const Vars & getBasisBits(const CC_NameMap & nameMap) const;
     27
     28    pablo::Assign * compileCC(const re::CC *cc);
    3229
    3330    pablo::Assign * compileCC(const re::CC *cc, pablo::PabloBlock & block);
    3431
    35     pablo::Assign * compileCC(const re::CC *cc, pablo::PabloBuilder & pb);
     32    pablo::Assign * compileCC(const re::CC *cc, pablo::PabloBuilder & builder);
    3633
    37     pablo::Assign * compileCC(const re::CC *cc);
     34    pablo::Assign * compileCC(const std::string && canonicalName, const re::CC *cc, pablo::PabloBlock & block);
     35
     36    pablo::Assign * compileCC(const std::string &&canonicalName, const re::CC *cc, pablo::PabloBuilder & builder);
    3837
    3938    void compileByteClasses(re::RE * re);
     
    4140private:
    4241    pablo::Var * getBasisVar(const int n) const;
    43     pablo::PabloAST * bit_pattern_expr(const unsigned pattern, unsigned selected_bits, pablo::PabloBuilder & pb);
    44     pablo::PabloAST * char_test_expr(const re::CodePointType ch, pablo::PabloBuilder & pb);
    45     pablo::PabloAST * make_range(const re::CodePointType n1, const re::CodePointType n2, pablo::PabloBuilder & pb);
    46     pablo::PabloAST * GE_Range(const unsigned N, const unsigned n, pablo::PabloBuilder & pb);
    47     pablo::PabloAST * LE_Range(const unsigned N, const unsigned n, pablo::PabloBuilder & pb);
    48     pablo::PabloAST * char_or_range_expr(const re::CodePointType lo, const re::CodePointType hi, pablo::PabloBuilder & pb);
    49     pablo::PabloAST * charset_expr(const re::CC *cc, pablo::PabloBuilder & pb);
     42    template<typename PabloBlockOrBuilder>
     43    pablo::PabloAST * bit_pattern_expr(const unsigned pattern, unsigned selected_bits, PabloBlockOrBuilder & pb);
     44    template<typename PabloBlockOrBuilder>
     45    pablo::PabloAST * char_test_expr(const re::codepoint_t ch, PabloBlockOrBuilder & pb);
     46    template<typename PabloBlockOrBuilder>
     47    pablo::PabloAST * make_range(const re::codepoint_t n1, const re::codepoint_t n2, PabloBlockOrBuilder & pb);
     48    template<typename PabloBlockOrBuilder>
     49    pablo::PabloAST * GE_Range(const unsigned N, const unsigned n, PabloBlockOrBuilder & pb);
     50    template<typename PabloBlockOrBuilder>
     51    pablo::PabloAST * LE_Range(const unsigned N, const unsigned n, PabloBlockOrBuilder & pb);
     52    template<typename PabloBlockOrBuilder>
     53    pablo::PabloAST * char_or_range_expr(const re::codepoint_t lo, const re::codepoint_t hi, PabloBlockOrBuilder & pb);
     54    template<typename PabloBlockOrBuilder>
     55    pablo::PabloAST * charset_expr(const re::CC *cc, PabloBlockOrBuilder & pb);
    5056private:   
    51     pablo::PabloBlock &         mCG;
     57    pablo::PabloBuilder         mBuilder;
    5258    std::vector<pablo::Var *>   mBasisBit;
    5359    const Encoding              mEncoding;
    5460};
     61
     62inline pablo::Assign * CC_Compiler::compileCC(const re::CC *cc) {
     63    return compileCC(cc, mBuilder);
     64}
     65
     66inline pablo::Assign * CC_Compiler::compileCC(const re::CC *cc, pablo::PabloBlock & block) {
     67    return compileCC(std::move(cc->canonicalName(re::ByteClass)), cc, block);
     68}
     69
     70inline pablo::Assign * CC_Compiler::compileCC(const re::CC *cc, pablo::PabloBuilder & builder) {
     71    return compileCC(std::move(cc->canonicalName(re::ByteClass)), cc, builder);
     72}
     73
     74inline const CC_Compiler::Vars & CC_Compiler::getBasisBits(const CC_NameMap &) const {
     75    return mBasisBit;
     76}
     77
    5578
    5679}
  • icGREP/icgrep-devel/icgrep/pablo/builder.hpp

    r4603 r4612  
    2020    }
    2121
    22     Var * createVar(const std::string name) {
    23         return createVar(mPb.getName(name));
     22    inline Var * createVar(const std::string name) {
     23        return mPb.createVar(name);
    2424    }
    2525
    26     Var * createVar(String * name);
     26    inline Var * createVar(String * const name) {
     27        return mPb.createVar(name);
     28    }
    2729
    28     PabloAST * createVar(const PabloAST * const) {
    29         throw std::runtime_error("Var objects should only refer to external Vars (i.e., input basis bit streams). Use Assign objects directly.");
     30    inline Var * createVar(PabloAST * const name) {
     31        return mPb.createVar(name);
    3032    }
    3133
     
    3638    Call * createCall(String * name);
    3739
    38     Assign * createAssign(const std::string prefix, PabloAST * expr, const int outputIndex = -1) {
    39         return mPb.createAssign(prefix, expr, outputIndex);
     40    Assign * createAssign(const std::string && prefix, PabloAST * expr, const int outputIndex = -1) {
     41        return mPb.createAssign(std::move(prefix), expr, outputIndex);
    4042    }
    4143
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.cpp

    r4611 r4612  
    2020/// UNARY CREATE FUNCTIONS
    2121
    22 Assign * PabloBlock::createAssign(const std::string prefix, PabloAST * expr, const int outputIndex)  {
     22Assign * PabloBlock::createAssign(const std::string && prefix, PabloAST * expr, const int outputIndex)  {
    2323    return insertAtInsertionPoint(new Assign(expr, outputIndex, makeName(prefix, false), this));
    2424}
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.h

    r4611 r4612  
    8383    }
    8484
    85     Assign * createAssign(const std::string prefix, PabloAST * expr, const int outputIndex = -1);
     85    Assign * createAssign(const std::string && prefix, PabloAST * expr, const int outputIndex = -1);
    8686
    8787    inline Var * createVar(const std::string name) {
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4611 r4612  
    347347    switch (stmt->getClassTypeId()) {
    348348        case PabloAST::ClassTypeId::Assign:
    349             bdd = input[0];
    350             break;
     349            return input[0];
    351350        case PabloAST::ClassTypeId::And:
    352351            bdd = And(input[0], input[1]);
     
    354353        case PabloAST::ClassTypeId::Next:
    355354        case PabloAST::ClassTypeId::Or:
    356             bdd = Or(input[0], input[1]);
    357             break;
     355            return Or(input[0], input[1]);
    358356        case PabloAST::ClassTypeId::Xor:
    359             bdd = Xor(input[0], input[1]);
    360             break;
     357            return Xor(input[0], input[1]);
    361358        case PabloAST::ClassTypeId::Not:
    362             bdd = Not(input[0]);
    363             break;
     359            return Not(input[0]);
    364360        case PabloAST::ClassTypeId::Sel:
    365361            bdd = Ite(input[0], input[1], input[2]);
     
    371367        case PabloAST::ClassTypeId::MatchStar:
    372368            if (LLVM_UNLIKELY(isZero(input[0]) || isZero(input[1]))) {
    373                 bdd = Zero();
    374                 break;
     369                return Zero();
    375370            }
    376371        case PabloAST::ClassTypeId::Call:
    377             bdd = NewVar();
    378             break;
     372            return NewVar();
    379373        case PabloAST::ClassTypeId::Advance:
    380             bdd = characterize(cast<Advance>(stmt), input[0]);
    381             break;
     374            return characterize(cast<Advance>(stmt), input[0]);
    382375        default:
    383376            throw std::runtime_error("Unexpected statement type " + stmt->getName()->to_string());
    384377    }
    385 
    386     assert ("Failed to generate a BDD." && (bdd));
    387378
    388379    if (LLVM_UNLIKELY(noSatisfyingAssignment(bdd))) {
     
    566557
    567558/** ------------------------------------------------------------------------------------------------------------- *
     559 * @brief prohibited
     560 *
     561 * If this statement is an Assign or Next node or any of its operands is a non-superfluous Assign or Next node,
     562 * then we're prohibited from minimizing this statement.
     563 ** ------------------------------------------------------------------------------------------------------------- */
     564inline bool prohibited(const Statement * const stmt) {
     565    if (isa<Assign>(stmt) || isa<Next>(stmt)) {
     566        return true;
     567    }
     568    for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     569        const PabloAST * const  op = stmt->getOperand(i);
     570        const Assign * const assign = dyn_cast<Assign>(op);
     571        if (LLVM_UNLIKELY((assign && !assign->superfluous()) || isa<Next>(op))) {
     572            return true;
     573        }
     574    }
     575    return false;
     576}
     577
     578/** ------------------------------------------------------------------------------------------------------------- *
    568579 * @brief minimize
    569580 ** ------------------------------------------------------------------------------------------------------------- */
     
    578589        }
    579590
    580         if (isa<Assign>(stmt) || isa<Next>(stmt)) {
     591        if (LLVM_UNLIKELY(prohibited(stmt))) {
    581592            continue;
    582593        }
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4611 r4612  
    3131    name << std::hex;
    3232    if ((type == ByteClass) && (mSparseCharSet.back().hi_codepoint >= 0x80)) {
    33       name << "BC_";
     33      name << "BC";
    3434    }
    3535    else {
    36         name << "CC_";
     36        name << "CC";
    3737    }
    3838    char separator = '_';
     
    4545            name << i.lo_codepoint << '_' << i.hi_codepoint;
    4646        }
     47        separator = ',';
    4748    }
    4849    return name.str();
    4950}
    5051
    51 CodePointType CC::max_codepoint() {
    52     return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.back().hi_codepoint;
    53 }
    54 
    55 void CC::insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
     52void CC::insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
    5653    CharSetItem item(lo_codepoint, hi_codepoint);
    5754    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
     
    7572}
    7673
    77 void CC::remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
     74void CC::remove_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
    7875    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    7976        CharSetItem & range = *i;
     
    133130        }
    134131        isect->insert_range(std::max(ra.lo_codepoint, rb.lo_codepoint), std::min(ra.hi_codepoint, rb.hi_codepoint));
    135         if (ra.hi_codepoint < rb.hi_codepoint) ++ai; 
     132        if (ra.hi_codepoint < rb.hi_codepoint) ++ai;
    136133        else ++bi;
    137134    }
     
    141138CC * caseInsensitize(const CC * cc) {
    142139    CC * cci = makeCC();
    143     for (auto i = cc->cbegin(); i != cc->cend(); i++) {
    144         caseInsensitiveInsertRange(cci, i->lo_codepoint, i->hi_codepoint);
     140    for (const CharSetItem & i : *cc) {
     141        caseInsensitiveInsertRange(cci, i.lo_codepoint, i.hi_codepoint);
    145142    }
    146143    return cci;
     
    153150 * @param hi
    154151 ** ------------------------------------------------------------------------------------------------------------- */
    155 CC * rangeIntersect(const CC * cc, const CodePointType lo, const CodePointType hi) {
     152CC * rangeIntersect(const CC * cc, const codepoint_t lo, const codepoint_t hi) {
    156153    assert ("cc cannot be null" && cc);
    157154    CC * intersect = makeCC();
     
    170167 * @param hi
    171168 ** ------------------------------------------------------------------------------------------------------------- */
    172 CC * rangeGaps(const CC * cc, const CodePointType lo, const CodePointType hi) {
     169CC * rangeGaps(const CC * cc, const codepoint_t lo, const codepoint_t hi) {
    173170    assert ("cc cannot be null" && cc);
    174171    CC * gaps = makeCC();
    175     CodePointType cp = lo;
     172    codepoint_t cp = lo;
    176173    if (cp < hi) {
    177174        auto i = cc->cbegin(), end = cc->cend();
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4611 r4612  
    1616namespace re {
    1717
    18 typedef int CodePointType;
    19 
    20 struct CharSetItem{   
    21     CharSetItem() : lo_codepoint(0), hi_codepoint(0) {}
    22     CharSetItem(const CodePointType lo, const CodePointType hi) : lo_codepoint(lo), hi_codepoint(hi) {}
    23     CodePointType lo_codepoint;
    24     CodePointType hi_codepoint;
     18typedef unsigned codepoint_t;
     19
     20struct CharSetItem {
     21    constexpr CharSetItem() : lo_codepoint(0), hi_codepoint(0) {}
     22    constexpr CharSetItem(const codepoint_t lo, const codepoint_t hi) : lo_codepoint(lo), hi_codepoint(hi) {}
     23    constexpr codepoint_t operator [](const unsigned i) const {
     24        return (i == 0) ? lo_codepoint : (i == 1) ? hi_codepoint : throw std::runtime_error("CharSetItem[] can only accept 0 or 1.");
     25    }
     26    codepoint_t lo_codepoint;
     27    codepoint_t hi_codepoint;
    2528};
    2629
     
    4649    typedef CharSetVector::const_reference          const_reference;
    4750
    48     static const CodePointType UNICODE_MAX = 0x10FFFF;
     51    static const codepoint_t UNICODE_MAX = 0x10FFFF;
    4952
    5053    std::string canonicalName(const CC_type type) const;
    5154
    52     CodePointType max_codepoint();
    53 
    54     void insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    55 
    56     void remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    57 
    58     inline void insert(const CodePointType codepoint) {
     55    CharSetItem & operator [](unsigned i) {
     56        return mSparseCharSet[i];
     57    }
     58
     59    const CharSetItem & operator [](unsigned i) const {
     60        return mSparseCharSet[i];
     61    }
     62
     63    inline codepoint_t min_codepoint() const {
     64        return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.front().lo_codepoint;
     65    }
     66
     67    inline codepoint_t max_codepoint() const {
     68        return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.back().hi_codepoint;
     69    }
     70
     71    void insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
     72
     73    void remove_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
     74
     75    inline void insert(const codepoint_t codepoint) {
    5976        insert_range(codepoint, codepoint);
    6077    }
    6178
    62     inline void remove(const CodePointType codepoint) {
     79    inline void remove(const codepoint_t codepoint) {
    6380        remove_range(codepoint, codepoint);
    6481    }
     
    116133protected:
    117134    friend CC * makeCC();
    118     friend CC * makeCC(const CodePointType codepoint);
    119     friend CC * makeCC(const CodePointType lo, const CodePointType hi);
     135    friend CC * makeCC(const codepoint_t codepoint);
     136    friend CC * makeCC(const codepoint_t lo, const codepoint_t hi);
    120137    friend CC * makeCC(const CC * cc1, const CC * cc2);
    121138    friend CC * subtractCC(const CC * cc1, const CC * cc2);
     
    126143    }
    127144    CC(const CC & cc);
    128     inline CC(const CodePointType codepoint)
     145    inline CC(const codepoint_t codepoint)
    129146    : RE(ClassTypeId::CC)
    130147    , mSparseCharSet(mCharSetAllocator) {
    131148        insert(codepoint);
    132149    }
    133     inline CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint)
     150    inline CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint)
    134151    : RE(ClassTypeId::CC)
    135152    , mSparseCharSet(mCharSetAllocator) {
     
    157174    return cc.cend();
    158175}
     176
     177
    159178
    160179/**
     
    170189}
    171190
    172 inline CC * makeCC(const CodePointType codepoint) {
     191inline CC * makeCC(const codepoint_t codepoint) {
    173192    return new CC(codepoint);
    174193}
    175194
    176 inline CC * makeCC(const CodePointType lo, const CodePointType hi) {
     195inline CC * makeCC(const codepoint_t lo, const codepoint_t hi) {
    177196    return new CC(lo, hi);
    178197}
     
    188207CC * caseInsensitize(const CC * cc);
    189208
    190 CC * rangeIntersect(const CC * cc, const CodePointType lo, const CodePointType hi);
    191 
    192 CC * rangeGaps(const CC * cc, const CodePointType lo, const CodePointType hi);
     209CC * rangeIntersect(const CC * cc, const codepoint_t lo, const codepoint_t hi);
     210
     211CC * rangeGaps(const CC * cc, const codepoint_t lo, const codepoint_t hi);
    193212
    194213CC * outerRanges(const CC * cc);
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4429 r4612  
    2020enum CharsetOperatorKind
    2121        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
    22 
    23 typedef unsigned codepoint_t;
    2422
    2523enum ModeFlagType
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4337 r4612  
    5050}
    5151
    52 RE * UTF8_Encoder::rangeToUTF8(const CodePointType lo, const CodePointType hi, const unsigned index, const unsigned max)
     52RE * UTF8_Encoder::rangeToUTF8(const codepoint_t lo, const codepoint_t hi, const unsigned index, const unsigned max)
    5353{
    54     const CodePointType hbyte = u8byte(hi, index);
    55     const CodePointType lbyte = u8byte(lo, index);
     54    const codepoint_t hbyte = u8byte(hi, index);
     55    const codepoint_t lbyte = u8byte(lo, index);
    5656    if (index == max) {
    5757        return makeByteRange(lbyte, hbyte);
     
    8080}
    8181
    82 inline CodePointType UTF8_Encoder::u8byte(const CodePointType codepoint, const unsigned n)
     82inline codepoint_t UTF8_Encoder::u8byte(const codepoint_t codepoint, const unsigned n)
    8383{
    84     CodePointType retVal = 0;
     84    codepoint_t retVal = 0;
    8585
    8686    const unsigned len = lenUTF8(codepoint);
     
    132132}
    133133
    134 inline CC * UTF8_Encoder::makeByteRange(const CodePointType lo, const CodePointType hi) {
     134inline CC * UTF8_Encoder::makeByteRange(const codepoint_t lo, const codepoint_t hi) {
    135135    return makeCC(lo, hi);
    136136}
    137137
    138 inline CC * UTF8_Encoder::makeByteClass(const CodePointType cp) {
     138inline CC * UTF8_Encoder::makeByteClass(const codepoint_t cp) {
    139139    return makeCC(cp, cp);
    140140}
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r4249 r4612  
    2222private:
    2323    static re::RE * rangeToUTF8(const re::CharSetItem & item);
    24     static re::RE * rangeToUTF8(const re::CodePointType lo, const re::CodePointType hi, const unsigned index, const unsigned max);
    25     static re::CC * makeByteClass(const re::CodePointType cp);
    26     static re::CC * makeByteRange(const re::CodePointType lo, const re::CodePointType hi);
     24    static re::RE * rangeToUTF8(const re::codepoint_t lo, const re::codepoint_t hi, const unsigned index, const unsigned max);
     25    static re::CC * makeByteClass(const re::codepoint_t cp);
     26    static re::CC * makeByteRange(const re::codepoint_t lo, const re::codepoint_t hi);
    2727    static bool isUTF8Prefix(const unsigned cp);
    2828    static unsigned lenUTF8(const unsigned cp);
    2929    static unsigned maxCodePoint(const unsigned length);
    30     static re::CodePointType u8byte(const re::CodePointType codepoint, const unsigned n);
     30    static re::codepoint_t u8byte(const re::codepoint_t codepoint, const unsigned n);
    3131};
    3232
Note: See TracChangeset for help on using the changeset viewer.