source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5822

Last change on this file since 5822 was 5797, checked in by cameron, 16 months ago

makeByte uses the Byte alphabet

File size: 3.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <kernels/kernel_builder.h>
15
16#include <llvm/Support/raw_ostream.h>
17
18using namespace cc;
19using namespace kernel;
20using namespace pablo;
21using namespace re;
22using namespace llvm;
23
24
25LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
26: PabloKernel(b, "lf" + std::to_string(basisBitsCount),
27// input
28{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
29// output
30{Binding{b->getStreamSetTy(1), "lf"}}) {
31
32}
33
34void LineFeedKernelBuilder::generatePabloMethod() {
35    CC_Compiler ccc(this, getInput(0));
36    auto & pb = ccc.getBuilder();
37    PabloAST * LF = ccc.compileCC("LF", makeByte(0x0A), pb);
38    pb.createAssign(pb.createExtract(getOutput(0), pb.getInteger(0)), LF);
39}
40
41LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
42: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
43// inputs
44{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
45,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
46// outputs
47{Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}
48,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
49
50}
51
52void LineBreakKernelBuilder::generatePabloMethod() {
53    CC_Compiler ccc(this, getInput(0));
54    auto & pb = ccc.getBuilder();
55
56    Integer * const ZERO = pb.getInteger(0);
57
58    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
59    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
60    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
61    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
62
63    // Remove the CR of any CR+LF
64    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
65    PabloBuilder crb = PabloBuilder::Create(pb);
66    pb.createIf(CR, crb);
67    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
68    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
69    crb.createAssign(CRLF, crlf);
70    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
71    crb.createAssign(LineBreak, removedCRLF);
72    // Record the CR marker of any CR+LF
73    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
74
75    // Check for Unicode Line Breaks
76    PabloAST * u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
77    PabloBuilder it = PabloBuilder::Create(pb);
78    pb.createIf(u8pfx, it);
79    PabloAST * u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
80    PabloAST * u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
81
82    // Two-byte sequences
83    PabloBuilder it2 = PabloBuilder::Create(it);
84    it.createIf(u8pfx2, it2);
85    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
86    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
87
88    // Three-byte sequences
89    PabloBuilder it3 = PabloBuilder::Create(it);
90    it.createIf(u8pfx3, it3);
91    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
92    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
93    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
94
95    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
96    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
97}
Note: See TracBrowser for help on using the repository browser.