Ignore:
Timestamp:
Dec 15, 2017, 12:44:01 PM (20 months ago)
Author:
nmedfort
Message:

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

    r5755 r5782  
    1414#include <kernels/kernel_builder.h>
    1515
     16#include <llvm/Support/raw_ostream.h>
     17
    1618using namespace cc;
    1719using namespace kernel;
     
    2022using namespace llvm;
    2123
    22 LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned basisBitsCount)
    23 : PabloKernel(b, "lb",
    24     {Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
    25     {Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}}) {
     24
     25LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
     26: PabloKernel(b, "lf" + std::to_string(basisBitsCount),
     27// input
     28{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
     29// output
     30{Binding{b->getStreamSetTy(1), "lf"}}) {
     31
     32}
     33
     34void LineFeedKernelBuilder::generatePabloMethod() {
     35    CC_Compiler ccc(this, getInput(0));
     36    auto & pb = ccc.getBuilder();
     37    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
     38    pb.createAssign(pb.createExtract(getOutput(0), pb.getInteger(0)), LF);
     39}
     40
     41LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
     42: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
     43// inputs
     44{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
     45,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
     46// outputs
     47{Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}
     48,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
    2649
    2750}
    2851
    2952void LineBreakKernelBuilder::generatePabloMethod() {
    30 
    3153    CC_Compiler ccc(this, getInput(0));
    3254    auto & pb = ccc.getBuilder();
    3355
    34     PabloAST * LineBreak = nullptr;
    35     PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
    36     PabloAST * CR = ccc.compileCC(makeCC(0x0D));
    37     PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     56    Integer * const ZERO = pb.getInteger(0);
    3857
    39     Zeroes * const zero = pb.createZeroes();
    40     Var * crlf = pb.createVar("crlf", zero);
     58    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
     59    PabloAST * const CR = ccc.compileCC(makeCC(0x0D));
     60    PabloAST * const LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     61    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
     62
     63    // Remove the CR of any CR+LF
     64    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
    4165    PabloBuilder crb = PabloBuilder::Create(pb);
    42 #ifndef USE_LOOKAHEAD_CRLF
    43     PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
    44     crb.createAssign(crlf, crb.createAnd(cr1, LF));
    45 #else
    46     PabloAST * lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
    47     crb.createAssign(crlf, crb.createAnd(CR, lookaheadLF));
    48 #endif
    4966    pb.createIf(CR, crb);
     67    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
     68    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
     69    crb.createAssign(CRLF, crlf);
     70    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
     71    crb.createAssign(LineBreak, removedCRLF);
     72    // Record the CR marker of any CR+LF
     73    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
    5074
    51     Var * NEL_LS_PS = pb.createVar("NEL_LS_PS", zero);
    52 
     75    // Check for Unicode Line Breaks
    5376    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
    5477    PabloBuilder it = PabloBuilder::Create(pb);
     
    5780    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
    5881
    59     //
    6082    // Two-byte sequences
    61     Var * NEL = it.createVar("NEL", zero);
    6283    PabloBuilder it2 = PabloBuilder::Create(it);
    63     it2.createAssign(NEL, it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
    6484    it.createIf(u8pfx2, it2);
     85    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2), "NEL");
     86    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
    6587
    66     //
    6788    // Three-byte sequences
    68     Var * LS_PS = it.createVar("LS_PS", zero);
    6989    PabloBuilder it3 = PabloBuilder::Create(it);
    7090    it.createIf(u8pfx3, it3);
    7191    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
    72     it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
    73     it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
     92    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3), "LS_PS");
     93    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
    7494
    75     PabloAST * LB_chars = pb.createOr(LF_VT_FF_CR, NEL_LS_PS);
    76     PabloAST * lb = nullptr;
    77     if (AlgorithmOptionIsSet(DisableUnicodeLineBreak)) {
    78         lb = LF;
    79     } else {
    80         lb = pb.createAnd(LB_chars, pb.createNot(crlf));  // count the CR, but not CRLF
    81     }
    82 
    83     PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LB_chars), 1));
    84     LineBreak = pb.createOr(lb, unterminatedLineAtEOF);
    85     PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(0));
    86     pb.createAssign(r, LineBreak);
    87 #ifdef USE_LOOKAHEAD_CRLF
    88     setLookAhead(1);
    89 #endif
     95    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1));
     96    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    9097}
Note: See TracChangeset for help on using the changeset viewer.