Ignore:
Timestamp:
Oct 26, 2017, 5:28:05 PM (17 months ago)
Author:
cameron
Message:

Enabling Unicode log2 bounded repetition uwing indexed advance (for n<64 and AVX2 only)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5706 r5710  
    3030#include "cc/cc_compiler.h"         // for CC_Compiler
    3131#include "pablo/builder.hpp"        // for PabloBuilder
     32#include "toolchain/toolchain.h"    // for AVX2_available
    3233#include <llvm/Support/ErrorHandling.h>
    3334
     
    342343*/
    343344
    344 inline PabloAST * RE_Compiler::consecutive_matches(PabloAST * repeated, int length, int repeat_count, PabloBuilder & pb) {
     345inline PabloAST * RE_Compiler::consecutive_matches(PabloAST * repeated, int length, int repeat_count, PabloAST * indexStream, PabloBuilder & pb) {
    345346    int i = length;
    346347    int total = repeat_count * length;
     
    348349    while ((i * 2) < total) {
    349350        PabloAST * v = consecutive_i;
    350         PabloAST * v2 =  pb.createAdvance(v, i);
     351        PabloAST * v2 = nullptr;
     352        if (indexStream == nullptr) v2 =  pb.createAdvance(v, i);
     353        else v2 =  pb.createIndexedAdvance(v, indexStream, i);
    351354        i *= 2;
    352355        consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(i) + "of" + std::to_string(total));
     
    354357    if (LLVM_LIKELY(i < total)) {
    355358        PabloAST * v = consecutive_i;
    356         PabloAST * v2 = pb.createAdvance(v, total - i);
     359        PabloAST * v2 =  nullptr;
     360        if (indexStream == nullptr) v2 = pb.createAdvance(v, total - i);
     361        else v2 = pb.createIndexedAdvance(v, indexStream, total - i);
    357362        consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(total));
    358363    }
     
    360365}
    361366
    362 inline PabloAST * RE_Compiler::reachable(PabloAST * repeated, int length, int repeat_count, PabloBuilder & pb) {
     367inline PabloAST * RE_Compiler::reachable(PabloAST * repeated, int length, int repeat_count, PabloAST * indexStream, PabloBuilder & pb) {
    363368    int i = length;
    364369    int total_lgth = repeat_count * length;
     
    369374    while ((i * 2) < total_lgth) {
    370375        PabloAST * v = reachable;
    371         PabloAST * v2 =  pb.createAdvance(v, i);
     376        PabloAST * v2 = nullptr;
     377        if (indexStream == nullptr) v2 = pb.createAdvance(v, i);
     378        else v2 = pb.createIndexedAdvance(v, indexStream, i);
    372379        i *= 2;
    373380        reachable = pb.createOr(v, v2, "within" + std::to_string(i));
     
    375382    if (LLVM_LIKELY(i < total_lgth)) {
    376383        PabloAST * v = reachable;
    377         PabloAST * v2 = pb.createAdvance(v, total_lgth - i);
     384        PabloAST * v2 = nullptr;
     385        if (indexStream == nullptr) v2 = pb.createAdvance(v, total_lgth - i);
     386        else v2 = pb.createIndexedAdvance(v, indexStream, total_lgth - i);
    378387        reachable = pb.createOr(v, v2, "within" + std::to_string(total_lgth));
    379388    }
     
    386395    } else if (!mGraphemeBoundaryRule && isByteLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
    387396        PabloAST * cc = markerVar(compile(repeated, pb));
    388         PabloAST * cc_lb = consecutive_matches(cc, 1, lb, pb);
     397        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, nullptr, pb);
    389398        const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
    390399        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos);
     400        return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
     401    } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (lb < sizeof(size_t) * 8) && AVX2_available()) {
     402        PabloAST * cc = markerVar(compile(repeated, pb));
     403        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb);
     404        const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
     405        PabloAST * marker_fwd = pb.createIndexedAdvance(markerVar(marker), mFinal, pos);
    391406        return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    392407    }
     
    423438        // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask.
    424439        // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region.
    425         PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, pb);
     440        PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, nullptr, pb);
    426441        PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask);
    427442        PabloAST * nonFinal = mNonFinal;
    428         if (mGraphemeBoundaryRule) {
    429             nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
    430         }
     443        // MatchStar deposits any cursors on the post position. However those cursors may may land on the initial "byte" of a
     444        // "multi-byte" character. Combine the masked range with any nonFinals.
     445        PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, nonFinal), "bounded");
     446        return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
     447    } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)&& (ub < sizeof(size_t) * 8) && AVX2_available()) {
     448        // log2 upper bound for fixed length (=1) class
     449        // Create a mask of positions reachable within ub from current marker.
     450        // Use matchstar, then apply filter.
     451        PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     452        // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask.
     453        // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region.
     454        PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, mFinal, pb);
     455        PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask);
     456        PabloAST * nonFinal = mNonFinal;
    431457        // MatchStar deposits any cursors on the post position. However those cursors may may land on the initial "byte" of a
    432458        // "multi-byte" character. Combine the masked range with any nonFinals.
Note: See TracChangeset for help on using the changeset viewer.