Changeset 5867


Ignore:
Timestamp:
Feb 8, 2018, 9:32:11 PM (10 months ago)
Author:
cameron
Message:

Fold UnicodeLineBreak? into Required_Streams_UTF8; CRLF into nonFinal

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5861 r5867  
    137137
    138138    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    139     StreamSetBuffer * CRLFStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    140     kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance<kernel::LineBreakKernelBuilder>(idb, encodingBits);
    141     mGrepDriver->makeKernelCall(linebreakK, {BasisBits, LineFeedStream}, {LineBreakStream, CRLFStream});
    142139
    143140    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    144     StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(3, 1), baseBufferSize);
    145     mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
     141    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     142    mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
    146143
    147144    const auto n = REs.size();
     
    156153            if (UnicodeSets.size() <= 1) {
    157154                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i]);
    158                 mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, CRLFStream, RequiredStreams}, {MatchResults});
     155                mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
    159156                MatchResultsBufs[i] = MatchResults;
    160157            } else {
     
    169166//                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
    170167                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], std::vector<cc::Alphabet *>{mpx.get()});
    171                 mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, CRLFStream, RequiredStreams, CharClasses}, {MatchResults});
     168                mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams, CharClasses}, {MatchResults});
    172169                MatchResultsBufs[i] = MatchResults;
    173170            }
     
    175172            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    176173            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i]);
    177             mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, CRLFStream, RequiredStreams}, {MatchResults});
     174            mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
    178175            MatchResultsBufs[i] = MatchResults;
    179176        }
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5863 r5867  
    4343    PabloBuilder pb(getEntryScope());
    4444    cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     45   
     46    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
     47    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
     48    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
     49    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
     50   
     51    // Remove the CR of any CR+LF
     52    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
     53    auto crb = pb.createScope();
     54    pb.createIf(CR, crb);
     55    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
     56    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
     57    crb.createAssign(CRLF, crlf);
     58    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
     59    crb.createAssign(LineBreak, removedCRLF);
     60
     61   
    4562    Zeroes * const ZEROES = pb.createZeroes();
    4663    PabloAST * const u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
     
    6481    it.createIf(u8pfx2, it2);
    6582    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
     83    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
     84    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
     85
    6686
    6787    //
     
    7999    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
    80100    it3.createAssign(EF_invalid, EX_invalid);
    81 
     101    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
     102    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
     103    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
    82104
    83105    //
     
    110132    //
    111133    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
    112 
    113     Var * const required = getOutputStreamVar("required");
     134    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
     135    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
     136   
     137    Var * const required = getOutputStreamVar("nonFinal");
    114138    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
     139    pb.createAssign(pb.createExtract(getOutputStreamVar("linebreak"), pb.getInteger(0)), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    115140}
    116141
     
    118143: PabloKernel(kb, "RequiredStreams_UTF8",
    119144// input
    120 {Binding{kb->getStreamSetTy(8), "basis"}},
    121 // output
    122 {Binding{kb->getStreamSetTy(1), "required", FixedRate()}}) {
     145{Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
     146// output
     147{Binding{kb->getStreamSetTy(1), "nonFinal", FixedRate()},
     148 Binding{kb->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}}) {
    123149
    124150}
     
    172198        Binding{b->getStreamSetTy(8), "basis"},
    173199        Binding{b->getStreamSetTy(1, 1), "linebreak"},
    174         Binding{b->getStreamSetTy(1, 1), "cr+lf"},
    175200        Binding{b->getStreamSetTy(1, 1), "required"}
    176201    };
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5863 r5867  
    2121#include <re/re_intersect.h>        // for Intersect
    2222#include <re/re_name.h>             // for Name, Name::Type, Name::Type::Zer...
    23 #include <re/re_name_resolve.h>     // for resolveNames
    24 #include <re/re_name_gather.h>      // for gatherNames
    2523#include <re/re_rep.h>              // for Rep, Rep::::UNBOUNDED_REP
    2624#include <re/re_seq.h>              // for Seq
     
    556554inline MarkerType RE_Compiler::compileEnd(MarkerType marker, pablo::PabloBuilder & pb) {
    557555    PabloAST * const nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    558     PabloAST * const atEOL = pb.createOr(pb.createAnd(mLineBreak, nextPos), pb.createAdvance(pb.createAnd(nextPos, mCRLF), 1), "eol");
     556    PabloAST * const atEOL = pb.createAnd(mLineBreak, nextPos, "eol");
     557    //PabloAST * const atEOL = pb.createOr(pb.createAnd(mLineBreak, nextPos), pb.createAdvance(pb.createAnd(nextPos, mCRLF), 1), "eol");
    559558    return makeMarker(FinalPostPositionUnit, atEOL);
    560559}
     
    590589, mCCCompiler(ccCompiler)
    591590, mLineBreak(nullptr)
    592 , mCRLF(nullptr)
    593591, mNonFinal(nullptr)
    594592, mFinal(nullptr)
     
    599597    Var * const linebreak = mKernel->getInputStreamVar("linebreak");
    600598    mLineBreak = mPB.createExtract(linebreak, 0);
    601     Var * const crlf = mKernel->getInputStreamVar("cr+lf");
    602     mCRLF = mPB.createExtract(crlf, 0);
    603599    Var * const required = mKernel->getInputStreamVar("required");
    604600    mNonFinal = mPB.createExtract(required, 0);
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5843 r5867  
    137137    cc::CC_Compiler &                               mCCCompiler;
    138138    pablo::PabloAST *                               mLineBreak;
    139     pablo::PabloAST *                               mCRLF;
    140     pablo::PabloAST *                               mInitial;
    141139    pablo::PabloAST *                               mNonFinal;
    142140    pablo::PabloAST *                               mFinal;
  • icGREP/icgrep-devel/icgrep/toolchain/grep_pipeline.cpp

    r5865 r5867  
    6363    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    6464   
    65     StreamSetBuffer * LineFeedStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    66     #ifdef USE_DIRECT_LF_BUILDER
    67     kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()});
    68     pxDriver.makeKernelCall(linefeedK, {ByteStream}, {LineFeedStream});
    69     #endif
    7065
    7166    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
     
    7368    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    7469
    75     #ifndef USE_DIRECT_LF_BUILDER
     70    StreamSetBuffer * LineFeedStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    7671    kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    7772    pxDriver.makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
    78     #endif
    7973
    80     kernel::Kernel * linebreakK = pxDriver.addKernelInstance<kernel::LineBreakKernelBuilder>(idb, 8);
    8174    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    82     StreamSetBuffer * CRLFStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    83     pxDriver.makeKernelCall(linebreakK, {BasisBits, LineFeedStream}, {LineBreakStream, CRLFStream});
    8475   
    8576    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    86     StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(3, 1), segmentSize);
    87     pxDriver.makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
     77    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(2, 1), segmentSize);
     78    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
    8879   
    8980    StreamSetBuffer * MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    9081    kernel::Kernel * icgrepK = pxDriver.addKernelInstance<kernel::ICGrepKernel>(idb, pattern);
    91     pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, CRLFStream, RequiredStreams}, {MatchResults});
     82    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
    9283   
    9384    StreamSetBuffer * MatchedLines = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
Note: See TracChangeset for help on using the changeset viewer.