source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5843

Last change on this file since 5843 was 5843, checked in by cameron, 15 months ago

CC Compiler refactoring step

File size: 4.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <kernels/kernel_builder.h>
15
16#include <llvm/Support/raw_ostream.h>
17
18using namespace cc;
19using namespace kernel;
20using namespace pablo;
21using namespace re;
22using namespace llvm;
23
24LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth)
25: PabloKernel(b, "lf" + std::to_string(codeUnitWidth),
26// input
27#ifdef USE_DIRECT_LF_BUILDER
28{Binding{b->getStreamSetTy(1, codeUnitWidth), "byteStream", FixedRate(), Principal()}},
29#else
30{Binding{b->getStreamSetTy(codeUnitWidth), "basis", FixedRate(), Principal()}},
31#endif
32// output
33{Binding{b->getStreamSetTy(1), "lf"}}) {
34
35}
36
37void LineFeedKernelBuilder::generatePabloMethod() {
38    PabloBuilder pb(getEntryScope());
39    #ifdef USE_DIRECT_LF_BUILDER
40    PabloAST * LF = compileCCfromCodeUnitStream(makeByte(0x0A), getInput(0), pb);
41    #else
42    cc::CC_Compiler ccc(this, getInputStreamSet("basis"));
43    PabloAST * LF = ccc.compileCC("LF", makeByte(0x0A), pb);
44    #endif
45    pb.createAssign(pb.createExtract(getOutput(0), 0), LF);
46}
47
48LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
49: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
50// inputs
51{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
52,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
53// outputs
54{Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}
55,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
56
57}
58
59void LineBreakKernelBuilder::generatePabloMethod() {
60    PabloBuilder pb(getEntryScope());
61    cc::CC_Compiler ccc(this, getInputStreamSet("basis"));
62
63    Integer * const ZERO = pb.getInteger(0);
64
65    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
66    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
67    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
68    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
69
70    // Remove the CR of any CR+LF
71    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
72    auto crb = pb.createScope();
73    pb.createIf(CR, crb);
74    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
75    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
76    crb.createAssign(CRLF, crlf);
77    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
78    crb.createAssign(LineBreak, removedCRLF);
79    // Record the CR marker of any CR+LF
80    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
81
82    // Check for Unicode Line Breaks
83    PabloAST * u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
84    auto it = pb.createScope();
85    pb.createIf(u8pfx, it);
86    PabloAST * u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
87    PabloAST * u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
88
89    // Two-byte sequences
90    auto it2 = it.createScope();
91    it.createIf(u8pfx2, it2);
92    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
93    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
94
95    // Three-byte sequences
96    auto it3 = it.createScope();
97    it.createIf(u8pfx3, it3);
98    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
99    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
100    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
101
102    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
103    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
104}
Note: See TracBrowser for help on using the repository browser.