Ignore:
Timestamp:
Feb 8, 2018, 9:32:11 PM (16 months ago)
Author:
cameron
Message:

Fold UnicodeLineBreak? into Required_Streams_UTF8; CRLF into nonFinal

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5863 r5867  
    4343    PabloBuilder pb(getEntryScope());
    4444    cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     45   
     46    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
     47    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
     48    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
     49    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
     50   
     51    // Remove the CR of any CR+LF
     52    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
     53    auto crb = pb.createScope();
     54    pb.createIf(CR, crb);
     55    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
     56    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
     57    crb.createAssign(CRLF, crlf);
     58    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
     59    crb.createAssign(LineBreak, removedCRLF);
     60
     61   
    4562    Zeroes * const ZEROES = pb.createZeroes();
    4663    PabloAST * const u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
     
    6481    it.createIf(u8pfx2, it2);
    6582    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
     83    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
     84    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
     85
    6686
    6787    //
     
    7999    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
    80100    it3.createAssign(EF_invalid, EX_invalid);
    81 
     101    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
     102    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
     103    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
    82104
    83105    //
     
    110132    //
    111133    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
    112 
    113     Var * const required = getOutputStreamVar("required");
     134    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
     135    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
     136   
     137    Var * const required = getOutputStreamVar("nonFinal");
    114138    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
     139    pb.createAssign(pb.createExtract(getOutputStreamVar("linebreak"), pb.getInteger(0)), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    115140}
    116141
     
    118143: PabloKernel(kb, "RequiredStreams_UTF8",
    119144// input
    120 {Binding{kb->getStreamSetTy(8), "basis"}},
    121 // output
    122 {Binding{kb->getStreamSetTy(1), "required", FixedRate()}}) {
     145{Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
     146// output
     147{Binding{kb->getStreamSetTy(1), "nonFinal", FixedRate()},
     148 Binding{kb->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}}) {
    123149
    124150}
     
    172198        Binding{b->getStreamSetTy(8), "basis"},
    173199        Binding{b->getStreamSetTy(1, 1), "linebreak"},
    174         Binding{b->getStreamSetTy(1, 1), "cr+lf"},
    175200        Binding{b->getStreamSetTy(1, 1), "required"}
    176201    };
Note: See TracChangeset for help on using the changeset viewer.