source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

Last change on this file was 6184, checked in by nmedfort, 9 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 4.4 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <IR_Gen/idisa_builder.h>
15#include <kernels/kernel_builder.h>
16
17#include <llvm/Support/raw_ostream.h>
18
19using namespace cc;
20using namespace kernel;
21using namespace pablo;
22using namespace re;
23using namespace llvm;
24using namespace IDISA;
25
26LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, StreamSet * BasisBits, StreamSet * LineFeedStream, cc::BitNumbering basisNumbering)
27: PabloKernel(b, "lf" + std::to_string(BasisBits->getNumElements()) + "x" + std::to_string(BasisBits->getFieldWidth()),
28// input
29{Binding{"basis", BasisBits}},
30{Binding{"lf", LineFeedStream}}),
31    mNumOfStreams(BasisBits->getNumElements()),
32    mStreamFieldWidth(BasisBits->getFieldWidth()),
33    mBasisSetNumbering(basisNumbering)
34{
35}
36
37void LineFeedKernelBuilder::generatePabloMethod() {
38    PabloBuilder pb(getEntryScope());
39    std::unique_ptr<CC_Compiler> ccc;
40    if (mNumOfStreams == 1) {
41        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
42    } else {
43        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
44    }
45    PabloAST * LF = ccc->compileCC("LF", makeByte(0x0A), pb);
46    pb.createAssign(pb.createExtract(getOutput(0), 0), LF);
47}
48
49LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
50: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
51// inputs
52{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
53,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
54// outputs
55{Binding{b->getStreamSetTy(1), "linebreak", FixedRate()}
56,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
57
58}
59
60void LineBreakKernelBuilder::generatePabloMethod() {
61    PabloBuilder pb(getEntryScope());
62    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
63
64    Integer * const ZERO = pb.getInteger(0);
65
66    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
67    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
68    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
69    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
70
71    // Remove the CR of any CR+LF
72    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
73    auto crb = pb.createScope();
74    pb.createIf(CR, crb);
75    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
76    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
77    crb.createAssign(CRLF, crlf);
78    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
79    crb.createAssign(LineBreak, removedCRLF);
80
81
82    // Record the CR marker of any CR+LF
83    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
84
85    // Check for Unicode Line Breaks
86    PabloAST * u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
87    auto it = pb.createScope();
88    pb.createIf(u8pfx, it);
89    PabloAST * u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
90    PabloAST * u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
91
92    // Two-byte sequences
93    auto it2 = it.createScope();
94    it.createIf(u8pfx2, it2);
95    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
96    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
97
98    // Three-byte sequences
99    auto it3 = it.createScope();
100    it.createIf(u8pfx3, it3);
101    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
102    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
103    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
104
105    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
106    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
107}
Note: See TracBrowser for help on using the repository browser.