Ignore:
Timestamp:
Dec 15, 2017, 12:44:01 PM (18 months ago)
Author:
nmedfort
Message:

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

Location:
icGREP/icgrep-devel/icgrep/re
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/casing.cpp

    r5781 r5782  
    2222
    2323namespace re {
    24 RE * resolveCaseInsensitiveMode(RE * re, bool inCaseInsensitiveMode) {
     24RE * resolveCaseInsensitiveMode(RE * re, const bool inCaseInsensitiveMode) {
    2525    if (isa<CC>(re)) {
    2626        if (inCaseInsensitiveMode) {
    27             UCD::UnicodeSet cased = caseInsensitize(*cast<CC>(re));
    28             return makeCC(std::move(cased));
     27            return makeCC(std::move(caseInsensitize(*cast<CC>(re))));
    2928        }
    30         else return re;
    31     }
    32     else if (Name * name = dyn_cast<Name>(re)) {
     29        return re;
     30    } else if (Name * name = dyn_cast<Name>(re)) {
    3331        if (!inCaseInsensitiveMode || (name->getDefinition() == nullptr)) return re;
    3432        RE * r = resolveCaseInsensitiveMode(name->getDefinition(), true);
    35         Name * n = makeName(name->getNamespace(), name->getName() + "/i", name->getType());
     33        Name * n = nullptr;
     34        if (name->hasNamespace()) {
     35            n = makeName(name->getNamespace(), name->getName() + "/i", name->getType());
     36        } else {
     37            n = makeName(name->getName() + "/i", name->getType());
     38        }
    3639        n->setDefinition(r);
    3740        return n;
    38     }
    39     else if (Seq * seq = dyn_cast<Seq>(re)) {
     41    } else if (Seq * seq = dyn_cast<Seq>(re)) {
    4042        std::vector<RE*> list;
    4143        for (auto i = seq->begin(); i != seq->end(); ++i) {
  • icGREP/icgrep-devel/icgrep/re/casing.h

    r5766 r5782  
    66class RE;
    77
    8 RE * resolveCaseInsensitiveMode(RE * re, bool inCaseInsensitiveMode);
     8RE * resolveCaseInsensitiveMode(RE * re, const bool inCaseInsensitiveMode);
    99
    1010}
  • icGREP/icgrep-devel/icgrep/re/re_alt.h

    r5775 r5782  
    5252RE * makeAlt(iterator begin, iterator end) {
    5353    Alt * newAlt = makeAlt();
    54     CC * unionCC = makeCC();
     54    CC * unionCC = nullptr;
    5555    for (auto i = begin; i != end; ++i) {
    56         if (const CC * cc = llvm::dyn_cast<CC>(*i)) {
    57             unionCC = makeCC(unionCC, cc);
     56        if (CC * cc = llvm::dyn_cast<CC>(*i)) {
     57            unionCC = unionCC ? makeCC(unionCC, cc) : cc;
    5858        } else if (const Alt * alt = llvm::dyn_cast<Alt>(*i)) {
    5959            // We have an Alt to embed within the alt.  We extract the individual
     
    6262            for (RE * a : *alt) {
    6363                if (CC * cc = llvm::dyn_cast<CC>(a)) {
    64                     unionCC = makeCC(unionCC, cc);
     64                    unionCC = unionCC ? makeCC(unionCC, cc) : cc;
     65                } else {
     66                    newAlt->push_back(a);
    6567                }
    66                 else newAlt->push_back(a);
    6768            }
    6869        }
     
    7172        }
    7273    }
    73     if (!unionCC->empty()) newAlt->push_back(unionCC);
     74    if (unionCC) {
     75        newAlt->push_back(unionCC);
     76    }
    7477    return newAlt;
    7578}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5780 r5782  
    3737namespace re { class Alt; }
    3838namespace re { class RE; }
    39 
    40 
    41 #define UNICODE_LINE_BREAK (!AlgorithmOptionIsSet(DisableUnicodeLineBreak))
    4239
    4340using namespace pablo;
     
    9996
    10097inline MarkerType RE_Compiler::compileAny(const MarkerType m, PabloBuilder & pb) {
    101     PabloAST * nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionUnit, pb));
     98    PabloAST * const nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionUnit, pb));
    10299    return makeMarker(MarkerPosition::FinalMatchUnit, nextFinalByte);
    103100}
    104101
    105102MarkerType RE_Compiler::compileCC(CC * cc, MarkerType marker, PabloBuilder & pb) {
    106     MarkerType nextPos;
    107     if (markerPos(marker) == MarkerPosition::FinalPostPositionUnit) {
    108         nextPos = marker;
    109     } else {
    110         nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    111     }
    112     return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(markerVar(nextPos), mCCCompiler.compileCC(cc, pb)));
     103    PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     104    return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
    113105}
    114106
     
    123115    } else if (isUnicodeUnitLength(name)) {
    124116        MarkerType nameMarker = compileName(name, pb);
    125         MarkerType nextPos;
    126         if (markerPos(marker) == MarkerPosition::FinalPostPositionUnit) {
    127             nextPos = marker;
    128         } else {
    129             nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    130         }
     117        MarkerType nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    131118        nameMarker.stream = pb.createAnd(markerVar(nextPos), markerVar(nameMarker), name->getName());
    132119        return nameMarker;
     
    560547}
    561548
    562 inline MarkerType RE_Compiler::compileStart(const MarkerType marker, pablo::PabloBuilder & pb) {
     549inline MarkerType RE_Compiler::compileStart(MarkerType marker, pablo::PabloBuilder & pb) {
     550    PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineBreak), 1));
     551    if (!AlgorithmOptionIsSet(DisableUnicodeLineBreak)) {
     552        sol = pb.createScanThru(pb.createAnd(mInitial, sol), mNonFinal);
     553    }
    563554    MarkerType m = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    564     if (UNICODE_LINE_BREAK) {
    565         PabloAST * line_end = mPB.createOr(mLineBreak, mCRLF);
    566         PabloAST * sol_init = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
    567         PabloAST * sol = pb.createScanThru(pb.createAnd(mInitial, sol_init), mNonFinal);
    568         return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
    569     } else {
    570         PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineBreak), 1));
    571         return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
    572     }
    573 }
    574 
    575 inline MarkerType RE_Compiler::compileEnd(const MarkerType marker, pablo::PabloBuilder & pb) {
    576     PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    577     return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(nextPos, mLineBreak, "eol"));
     555    return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
     556}
     557
     558inline MarkerType RE_Compiler::compileEnd(MarkerType marker, pablo::PabloBuilder & pb) {
     559    PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     560    return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(pb.createScanThru(nextPos, mCRLF), mLineBreak, "eol"));
    578561}
    579562
     
    605588    llvm::report_fatal_error(errmsg);
    606589}
    607    
    608    
    609590
    610591RE_Compiler::RE_Compiler(PabloKernel * kernel, cc::CC_Compiler & ccCompiler)
     
    623604    Var * const linebreak = mKernel->getInputStreamVar("linebreak");
    624605    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     606    Var * const crlf = mKernel->getInputStreamVar("cr+lf");
     607    mCRLF = mPB.createExtract(crlf, mPB.getInteger(0));
    625608    Var * const required = mKernel->getInputStreamVar("required");
    626609    mInitial = mPB.createExtract(required, mPB.getInteger(0));
    627610    mNonFinal = mPB.createExtract(required, mPB.getInteger(1));
    628611    mFinal = mPB.createExtract(required, mPB.getInteger(2));
    629     mCRLF = mPB.createExtract(required, mPB.getInteger(3));
     612
    630613}
    631614
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5780 r5782  
    111111    MarkerType compileName(Name * name, pablo::PabloBuilder & pb);
    112112    MarkerType compileAny(const MarkerType m, pablo::PabloBuilder & pb);
    113     MarkerType compileStart(const MarkerType marker, pablo::PabloBuilder & pb);
    114     MarkerType compileEnd(const MarkerType marker, pablo::PabloBuilder & pb);
     113    MarkerType compileStart(MarkerType marker, pablo::PabloBuilder & pb);
     114    MarkerType compileEnd(MarkerType marker, pablo::PabloBuilder & pb);
    115115
    116116    MarkerType AdvanceMarker(MarkerType marker, const MarkerPosition newpos, pablo::PabloBuilder & pb);
  • icGREP/icgrep-devel/icgrep/re/to_utf8.cpp

    r5760 r5782  
    1919#include <re/re_assertion.h>
    2020#include <llvm/Support/Casting.h>
     21#include <llvm/Support/ErrorHandling.h>
    2122
    2223using namespace llvm;
     
    9495        return makeIntersect(toUTF8(e->getLH()), toUTF8(e->getRH()));
    9596    }
    96 }
     97    llvm_unreachable("unexpected RE type given to toUTF8");
     98    return nullptr;
    9799}
    98100
     101}
     102
Note: See TracChangeset for help on using the changeset viewer.