source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5824

Last change on this file since 5824 was 5824, checked in by cameron, 16 months ago

Test case for compileCCfromCodeUnitStream using DirectLineFeedBuilder?

File size: 4.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <kernels/kernel_builder.h>
15
16#include <llvm/Support/raw_ostream.h>
17
18using namespace cc;
19using namespace kernel;
20using namespace pablo;
21using namespace re;
22using namespace llvm;
23
24
25DirectLineFeedBuilder::DirectLineFeedBuilder(const std::unique_ptr<kernel::KernelBuilder> & b)
26: PabloKernel(b, "lf_byte",
27// input
28{Binding{b->getStreamSetTy(1, 8), "codeUnitStream", FixedRate(), Principal()}},
29// output
30{Binding{b->getStreamSetTy(1), "lf"}}) {
31
32}
33
34void DirectLineFeedBuilder::generatePabloMethod() {
35    PabloBuilder pb(getEntryBlock());
36    PabloAST * LF = compileCCfromCodeUnitStream(makeByte(0x0A), getInput(0), pb);
37    pb.createAssign(pb.createExtract(getOutput(0), pb.getInteger(0)), LF);
38}
39
40
41LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
42: PabloKernel(b, "lf" + std::to_string(basisBitsCount),
43// input
44{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
45// output
46{Binding{b->getStreamSetTy(1), "lf"}}) {
47
48}
49
50void LineFeedKernelBuilder::generatePabloMethod() {
51    CC_Compiler ccc(this, getInput(0));
52    auto & pb = ccc.getBuilder();
53    PabloAST * LF = ccc.compileCC("LF", makeByte(0x0A), pb);
54    pb.createAssign(pb.createExtract(getOutput(0), pb.getInteger(0)), LF);
55}
56
57LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
58: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
59// inputs
60{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
61,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
62// outputs
63{Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}
64,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
65
66}
67
68void LineBreakKernelBuilder::generatePabloMethod() {
69    CC_Compiler ccc(this, getInput(0));
70    auto & pb = ccc.getBuilder();
71
72    Integer * const ZERO = pb.getInteger(0);
73
74    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
75    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
76    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
77    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
78
79    // Remove the CR of any CR+LF
80    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
81    PabloBuilder crb = PabloBuilder::Create(pb);
82    pb.createIf(CR, crb);
83    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
84    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
85    crb.createAssign(CRLF, crlf);
86    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
87    crb.createAssign(LineBreak, removedCRLF);
88    // Record the CR marker of any CR+LF
89    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
90
91    // Check for Unicode Line Breaks
92    PabloAST * u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
93    PabloBuilder it = PabloBuilder::Create(pb);
94    pb.createIf(u8pfx, it);
95    PabloAST * u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
96    PabloAST * u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
97
98    // Two-byte sequences
99    PabloBuilder it2 = PabloBuilder::Create(it);
100    it.createIf(u8pfx2, it2);
101    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
102    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
103
104    // Three-byte sequences
105    PabloBuilder it3 = PabloBuilder::Create(it);
106    it.createIf(u8pfx3, it3);
107    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
108    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
109    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
110
111    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
112    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
113}
Note: See TracBrowser for help on using the repository browser.