Changeset 4624


Ignore:
Timestamp:
Jun 30, 2015, 12:09:29 AM (4 years ago)
Author:
nmedfort
Message:

Some bug fixes for UCD Compiler

Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4623 r4624  
    4646            continue;
    4747        }
    48         PabloBuilder inner_block(block);
     48        PabloBuilder inner_block = PabloBuilder::Create(block);
    4949        PabloAST * inner_target = generateWithIfHierarchy(inner, set, lo, hi, inner_block);
    5050        // If this range is empty, just skip creating the if block
     
    5353        }
    5454        Assign * matches = inner_block.createAssign("m", inner_target);
    55         block.createIf(ifTestCompiler(lo, hi, block), {matches}, inner_block.getPabloBlock());
     55        block.createIf(ifTestCompiler(lo, hi, block), {matches}, inner_block);
    5656        target = block.createOr(target, matches);
    5757    }
     
    101101        }
    102102        else if (min == byte_no) {
    103             // We have a single byte remaining to marangetch for all codepoints in this cc.
     103            // We have a single byte remaining to match for all code points in this CC.
    104104            // Use the byte class compiler to generate matches for these codepoints.
    105105            const auto bytes = byteDefinitions(ranges, byte_no);
    106106            PabloAST * testVar = mCharacterClassCompiler.compileCC(makeCC(bytes), block);
    107107            if (byte_no > 1) {
    108                 testVar = block.createAnd(testVar, block.createAdvance(makePrefix(lo, byte_no, block), 1));
     108                testVar = block.createAnd(testVar, block.createAdvance(makePrefix(lo, byte_no, block, prefix), 1));
    109109            }
    110110            target = block.createOr(target, testVar);
     
    215215 * Ensure the sequence of preceding bytes is defined, up to, but not including the given byte_no
    216216 ** ------------------------------------------------------------------------------------------------------------- */
    217 PabloAST * UCDCompiler::makePrefix(const codepoint_t cp, const unsigned byte_no, pablo::PabloBuilder & pb) {
    218     std::array<PabloAST *, 4> prefixes;
     217PabloAST * UCDCompiler::makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & pb, PabloAST * prefix) {
    219218    assert (byte_no >= 1 && byte_no <= 4);
     219    assert (byte_no == 1 || prefix != nullptr);
    220220    for (unsigned i = 1; i != byte_no; ++i) {
    221221        const CC * const cc = makeCC(UTF8_Encoder::encodingByte(cp, i));
    222         prefixes[i - 1] = mCharacterClassCompiler.compileCC(cc, pb);
     222        PabloAST * var = mCharacterClassCompiler.compileCC(cc, pb);
    223223        if (i > 1) {
    224             prefixes[i - 1] = pb.createAnd(prefixes[i - 1], pb.createAdvance(prefixes[i - 2], 1));
    225         }
    226     }
    227     return prefixes[byte_no - 1];
     224            var = pb.createAnd(var, pb.createAdvance(prefix, 1));
     225        }
     226        prefix = var;
     227    }
     228    return prefix;
    228229}
    229230
     
    293294
    294295/** ------------------------------------------------------------------------------------------------------------- *
    295  * @brief rangeOuter
     296 * @brief outerRanges
    296297 * @param list
    297298 ** ------------------------------------------------------------------------------------------------------------- */
    298299UCDCompiler::RangeList UCDCompiler::outerRanges(const RangeList & list) {
    299300    RangeList ranges;
    300     for (auto i = list.cbegin(), j = i; ++j != list.cend(); ) {
    301         if (hi_codepoint(*j) > hi_codepoint(*i)) {
     301    if (LLVM_LIKELY(list.size() > 0)) {
     302        auto i = list.cbegin();
     303        for (auto j = i + 1; j != list.cend(); ++j) {
     304            if (hi_codepoint(*j) > hi_codepoint(*i)) {
     305                ranges.emplace_back(lo_codepoint(*i), hi_codepoint(*i));
     306                i = j;
     307            }
     308        }
     309        if (LLVM_LIKELY(i != list.end())) {
    302310            ranges.emplace_back(lo_codepoint(*i), hi_codepoint(*i));
    303             i = j;
    304311        }
    305312    }
     
    308315
    309316/** ------------------------------------------------------------------------------------------------------------- *
    310  * @brief rangeInner
     317 * @brief innerRanges
    311318 * @param list
    312319 ** ------------------------------------------------------------------------------------------------------------- */
    313320UCDCompiler::RangeList UCDCompiler::innerRanges(const RangeList & list) {
    314321    RangeList ranges;
    315     for (auto i = list.cbegin(), j = i; ++j != list.cend(); ) {
    316         if (hi_codepoint(*j) <= hi_codepoint(*i)) {
    317             ranges.emplace_back(lo_codepoint(*j), hi_codepoint(*j));
    318         }
    319         else {
    320             i = j;
     322    if (LLVM_LIKELY(list.size() > 0)) {
     323        for (auto i = list.cbegin(), j = i + 1; j != list.cend(); ++j) {
     324            if (hi_codepoint(*j) <= hi_codepoint(*i)) {
     325                ranges.emplace_back(lo_codepoint(*j), hi_codepoint(*j));
     326            }
     327            else {
     328                i = j;
     329            }
    321330        }
    322331    }
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.hpp

    r4623 r4624  
    4747    PabloAST * ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target);
    4848
    49     PabloAST * makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & pb);
     49    PabloAST * makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & pb, PabloAST * prefix);
    5050
    5151    static RangeList byteDefinitions(const RangeList & list, const unsigned byte_no);
Note: See TracChangeset for help on using the changeset viewer.