Ignore:
Timestamp:
Jul 12, 2015, 3:52:43 PM (4 years ago)
Author:
nmedfort
Message:

Start of work to make the pablo compiler reenterant. Fixed bug that prevented it from using Less optimization level.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4638 r4661  
    2727 * @param ifRangeList
    2828 ** ------------------------------------------------------------------------------------------------------------- */
    29 PabloAST * UCDCompiler::generateWithIfHierarchy(const RangeList & ifRanges, const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & block) {
    30 
    31     PabloAST * target = block.createZeroes();
     29PabloAST * UCDCompiler::generateWithIfHierarchy(const RangeList & ifRanges, const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder) {
     30
     31    PabloAST * target = builder.createZeroes();
    3232    // Codepoints in unenclosed ranges will be computed unconditionally.
    3333    // Generate them first so that computed subexpressions may be shared
     
    3737
    3838    for (const auto rg : rangeGaps(enclosed, lo, hi)) {
    39         target = generateSubRanges(set, lo_codepoint(rg), hi_codepoint(rg), block, target);
     39        target = generateSubRanges(set, lo_codepoint(rg), hi_codepoint(rg), builder, target);
    4040    }
    4141
     
    4646        std::tie(lo, hi) = range;
    4747        if (set.intersects(lo, hi)) {
    48             PabloBuilder inner_block = PabloBuilder::Create(block);
     48            PabloBuilder inner_block = PabloBuilder::Create(builder);
    4949            PabloAST * inner_target = generateWithIfHierarchy(inner, set, lo, hi, inner_block);
    5050            // If this range is empty, just skip creating the if block
     
    5353            }
    5454            Assign * matches = inner_block.createAssign("m", inner_target);
    55             block.createIf(ifTestCompiler(lo, hi, block), {matches}, inner_block);
    56             target = block.createOr(target, matches);
     55            builder.createIf(ifTestCompiler(lo, hi, builder), {matches}, inner_block);
     56            target = builder.createOr(target, matches);
    5757        }
    5858    }
     
    6565 * @param ifRangeList
    6666 ** ------------------------------------------------------------------------------------------------------------- */
    67 PabloAST * UCDCompiler::generateSubRanges(const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & block, PabloAST * target) {
     67PabloAST * UCDCompiler::generateSubRanges(const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder, PabloAST * target) {
    6868    const auto range = rangeIntersect(set, lo, hi);
    6969    // Divide by UTF-8 length, separating out E0, ED, F0 and F4 ranges
     
    7373    for (auto r : ranges) {
    7474        const auto subrange = rangeIntersect(range, lo_codepoint(r), hi_codepoint(r));
    75         target = sequenceGenerator(std::move(subrange), 1, block, target, nullptr);
     75        target = sequenceGenerator(std::move(subrange), 1, builder, target, nullptr);
    7676    }
    7777    return target;
     
    8686 * matching the sequences up to byte number byte_no have been generated.
    8787 ** ------------------------------------------------------------------------------------------------------------- */
    88 PabloAST * UCDCompiler::sequenceGenerator(const RangeList && ranges, const unsigned byte_no, PabloBuilder & block, PabloAST * target, PabloAST * prefix) {
     88PabloAST * UCDCompiler::sequenceGenerator(const RangeList && ranges, const unsigned byte_no, PabloBuilder & builder, PabloAST * target, PabloAST * prefix) {
    8989
    9090    if (LLVM_LIKELY(!ranges.empty())) {
     
    9898        if (min != max) {
    9999            const auto mid = UTF8_Encoder::maxCodePoint(min);
    100             target = sequenceGenerator(std::move(rangeIntersect(ranges, lo, mid)), byte_no, block, target, prefix);
    101             target = sequenceGenerator(std::move(rangeIntersect(ranges, mid + 1, hi)), byte_no, block, target, prefix);
     100            target = sequenceGenerator(std::move(rangeIntersect(ranges, lo, mid)), byte_no, builder, target, prefix);
     101            target = sequenceGenerator(std::move(rangeIntersect(ranges, mid + 1, hi)), byte_no, builder, target, prefix);
    102102        }
    103103        else if (min == byte_no) {
     
    105105            // Use the byte class compiler to generate matches for these codepoints.
    106106            const auto bytes = byteDefinitions(ranges, byte_no);
    107             PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(bytes), block);
     107            PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(bytes), builder);
    108108            if (byte_no > 1) {
    109                 var = block.createAnd(var, block.createAdvance(makePrefix(lo, byte_no, block, prefix), 1));
    110             }
    111             target = block.createOr(target, var);
     109                var = builder.createAnd(var, builder.createAdvance(makePrefix(lo, byte_no, builder, prefix), 1));
     110            }
     111            target = builder.createOr(target, var);
    112112        }
    113113        else {
     
    120120                    if (!UTF8_Encoder::isLowCodePointAfterByte(lo, byte_no)) {
    121121                        const codepoint_t mid = lo | ((1 << (6 * (min - byte_no))) - 1);
    122                         target = sequenceGenerator(lo, mid, byte_no, block, target, prefix);
    123                         target = sequenceGenerator(mid + 1, hi, byte_no, block, target, prefix);
     122                        target = sequenceGenerator(lo, mid, byte_no, builder, target, prefix);
     123                        target = sequenceGenerator(mid + 1, hi, byte_no, builder, target, prefix);
    124124                    }
    125125                    else if (!UTF8_Encoder::isHighCodePointAfterByte(hi, byte_no)) {
    126126                        const codepoint_t mid = hi & ~((1 << (6 * (min - byte_no))) - 1);
    127                         target = sequenceGenerator(lo, mid - 1, byte_no, block, target, prefix);
    128                         target = sequenceGenerator(mid, hi, byte_no, block, target, prefix);
     127                        target = sequenceGenerator(lo, mid - 1, byte_no, builder, target, prefix);
     128                        target = sequenceGenerator(mid, hi, byte_no, builder, target, prefix);
    129129                    }
    130130                    else { // we have a prefix group of type (a)
    131                         PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
     131                        PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
    132132                        if (byte_no > 1) {
    133                             var = block.createAnd(block.createAdvance(prefix, 1), var);
     133                            var = builder.createAnd(builder.createAdvance(prefix, 1), var);
    134134                        }
    135135                        for (unsigned i = byte_no; i != UTF8_Encoder::length(lo); ++i) {
    136                             var = block.createAnd(mSuffixVar, block.createAdvance(var, 1));
     136                            var = builder.createAnd(mSuffixVar, builder.createAdvance(var, 1));
    137137                        }
    138                         target = block.createOr(target, var);
     138                        target = builder.createOr(target, var);
    139139                    }
    140140                }
    141141                else { // lbyte == hbyte
    142                     PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
     142                    PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
    143143                    if (byte_no > 1) {
    144                         var = block.createAnd(block.createAdvance(prefix ? prefix : var, 1), var);
     144                        var = builder.createAnd(builder.createAdvance(prefix ? prefix : var, 1), var);
    145145                    }
    146146                    if (byte_no < UTF8_Encoder::length(lo)) {
    147                         target = sequenceGenerator(lo, hi, byte_no + 1, block, target, var);
     147                        target = sequenceGenerator(lo, hi, byte_no + 1, builder, target, var);
    148148                    }
    149149                }
     
    157157 * @brief sequenceGenerator
    158158 ** ------------------------------------------------------------------------------------------------------------- */
    159 inline PabloAST * UCDCompiler::sequenceGenerator(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target, PabloAST * prefix) {
    160     return sequenceGenerator({{ lo, hi }}, byte_no, block, target, prefix);
     159inline PabloAST * UCDCompiler::sequenceGenerator(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & builder, PabloAST * target, PabloAST * prefix) {
     160    return sequenceGenerator({{ lo, hi }}, byte_no, builder, target, prefix);
    161161}
    162162
     
    164164 * @brief ifTestCompiler
    165165 ** ------------------------------------------------------------------------------------------------------------- */
    166 inline PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, PabloBuilder & block) {
    167     return ifTestCompiler(lo, hi, 1, block, block.createOnes());
     166inline PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder) {
     167    return ifTestCompiler(lo, hi, 1, builder, builder.createOnes());
    168168}
    169169
     
    171171 * @brief ifTestCompiler
    172172 ** ------------------------------------------------------------------------------------------------------------- */
    173 PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target) {
     173PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & builder, PabloAST * target) {
    174174
    175175    codepoint_t lo_byte = UTF8_Encoder::encodingByte(lo, byte_no);
     
    183183            if (hi == 0x10FFFF) hi_byte = 0xFF;
    184184        }
    185         PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
    186         target = block.createAnd(cc, target);
     185        PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
     186        target = builder.createAnd(cc, target);
    187187    }
    188188    else if (lo_byte == hi_byte) {
    189         PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
    190         target = block.createAnd(cc, target);
    191         target = block.createAdvance(target, 1);
    192         target = ifTestCompiler(lo, hi, byte_no + 1, block, target);
     189        PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
     190        target = builder.createAnd(cc, target);
     191        target = builder.createAdvance(target, 1);
     192        target = ifTestCompiler(lo, hi, byte_no + 1, builder, target);
    193193    }
    194194    else if (!at_hi_boundary) {
    195195        const auto mid = UTF8_Encoder::minCodePointWithCommonBytes(hi, byte_no);
    196         PabloAST * e1 = ifTestCompiler(lo, mid - 1, byte_no, block, target);
    197         PabloAST * e2 = ifTestCompiler(mid, hi, byte_no, block, target);
    198         target = block.createOr(e1, e2);
     196        PabloAST * e1 = ifTestCompiler(lo, mid - 1, byte_no, builder, target);
     197        PabloAST * e2 = ifTestCompiler(mid, hi, byte_no, builder, target);
     198        target = builder.createOr(e1, e2);
    199199    }
    200200    else {
    201201        const auto mid = UTF8_Encoder::maxCodePointWithCommonBytes(lo, byte_no);
    202         PabloAST * e1 = ifTestCompiler(lo, mid, byte_no, block, target);
    203         PabloAST * e2 = ifTestCompiler(mid + 1, hi, byte_no, block, target);
    204         target = block.createOr(e1, e2);
     202        PabloAST * e1 = ifTestCompiler(lo, mid, byte_no, builder, target);
     203        PabloAST * e2 = ifTestCompiler(mid + 1, hi, byte_no, builder, target);
     204        target = builder.createOr(e1, e2);
    205205    }
    206206    return target;
     
    214214 * Ensure the sequence of preceding bytes is defined, up to, but not including the given byte_no
    215215 ** ------------------------------------------------------------------------------------------------------------- */
    216 PabloAST * UCDCompiler::makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & pb, PabloAST * prefix) {
     216PabloAST * UCDCompiler::makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & builder, PabloAST * prefix) {
    217217    assert (byte_no >= 1 && byte_no <= 4);
    218218    assert (byte_no == 1 || prefix != nullptr);
    219219    for (unsigned i = 1; i != byte_no; ++i) {
    220220        const CC * const cc = makeCC(UTF8_Encoder::encodingByte(cp, i));
    221         PabloAST * var = mCharacterClassCompiler.compileCC(cc, pb);
     221        PabloAST * var = mCharacterClassCompiler.compileCC(cc, builder);
    222222        if (i > 1) {
    223             var = pb.createAnd(var, pb.createAdvance(prefix, 1));
     223            var = builder.createAnd(var, builder.createAdvance(prefix, 1));
    224224        }
    225225        prefix = var;
     
    427427        {0x10000, 0x10FFFF}};
    428428
    429 //    llvm::raw_os_ostream out(std::cerr);
    430 
    431 //    set.dump(out);
    432 
    433 //    for (auto range : set) {
    434 //        out << range.first << ',' << range.second << "\n";
    435 //    }
    436 
    437 //    out.flush();
    438 
    439429    return generateWithIfHierarchy(defaultIfHierachy, set, entry);
    440430}
Note: See TracChangeset for help on using the changeset viewer.