source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5540

Last change on this file since 5540 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 3.2 KB
RevLine 
[5358]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
[5436]14#include <kernels/kernel_builder.h>
[5358]15
16using namespace cc;
17using namespace kernel;
18using namespace pablo;
19using namespace re;
20using namespace llvm;
21
[5436]22LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned basisBitsCount)
[5435]23: PabloKernel(b, "lb", {Binding{b->getStreamSetTy(basisBitsCount), "basis"}}, {Binding{b->getStreamSetTy(1), "linebreak", Add1()}}) {
[5358]24
[5435]25}
26
[5436]27void LineBreakKernelBuilder::generatePabloMethod() {
[5435]28
[5358]29    CC_Compiler ccc(this, getInput(0));
[5435]30    auto & pb = ccc.getBuilder();
31
[5370]32    PabloAST * LineBreak = nullptr;
[5435]33    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
[5358]34    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
35    PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
36
[5435]37    Zeroes * const zero = pb.createZeroes();
38    Var * crlf = pb.createVar("crlf", zero);
39    PabloBuilder crb = PabloBuilder::Create(pb);
[5385]40#ifndef USE_LOOKAHEAD_CRLF
41    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
42    crb.createAssign(crlf, crb.createAnd(cr1, LF));
43#else
[5383]44    PabloAST * lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
45    crb.createAssign(crlf, crb.createAnd(CR, lookaheadLF));
[5385]46#endif
[5435]47    pb.createIf(CR, crb);
[5358]48
[5435]49    Var * NEL_LS_PS = pb.createVar("NEL_LS_PS", zero);
50
[5358]51    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
[5435]52    PabloBuilder it = PabloBuilder::Create(pb);
53    pb.createIf(u8pfx, it);
[5358]54    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
55    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
56
57    //
58    // Two-byte sequences
59    Var * NEL = it.createVar("NEL", zero);
60    PabloBuilder it2 = PabloBuilder::Create(it);
61    it2.createAssign(NEL, it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
62    it.createIf(u8pfx2, it2);
63
64    //
65    // Three-byte sequences
66    Var * LS_PS = it.createVar("LS_PS", zero);
67    PabloBuilder it3 = PabloBuilder::Create(it);
68    it.createIf(u8pfx3, it3);
69    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
70    it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
71    it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
72
[5435]73    PabloAST * LB_chars = pb.createOr(LF_VT_FF_CR, NEL_LS_PS);
[5436]74    PabloAST * lb = nullptr;
75    if (AlgorithmOptionIsSet(DisableUnicodeLineBreak)) {
76        lb = LF;
77    } else {
78        lb = pb.createAnd(LB_chars, pb.createNot(crlf));  // count the CR, but not CRLF
79    }
[5358]80
[5435]81    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LB_chars), 1));
82    LineBreak = pb.createOr(lb, unterminatedLineAtEOF);
83    PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(0));
84    pb.createAssign(r, LineBreak);
[5385]85#ifdef USE_LOOKAHEAD_CRLF
[5383]86    setLookAhead(1);
[5385]87#endif
[5358]88}
Note: See TracBrowser for help on using the repository browser.