source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 6119

Last change on this file since 6119 was 6119, checked in by xwa163, 13 months ago
  1. Add some BasisSetNumbering? option to fix bug of multiplexing
  2. Use BiigEndian? BitNumbering? for lz4 and lzparabix related pipeline
  3. Support multiplexing in LZ4BitStreamAio pipeline
File size: 4.4 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <IR_Gen/idisa_builder.h>
15#include <kernels/kernel_builder.h>
16
17#include <llvm/Support/raw_ostream.h>
18
19using namespace cc;
20using namespace kernel;
21using namespace pablo;
22using namespace re;
23using namespace llvm;
24using namespace IDISA;
25
26LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, Binding && inputStreamSet, cc::BitNumbering basisNumbering)
27: PabloKernel(b, "lf" + std::to_string(getNumOfStreams(inputStreamSet.getType())) + "x" + std::to_string(getStreamFieldWidth(inputStreamSet.getType())),
28// input
29{inputStreamSet},
30{Binding{b->getStreamSetTy(1), "lf"}}),
31    mNumOfStreams(getNumOfStreams(inputStreamSet.getType())),
32    mStreamFieldWidth(getStreamFieldWidth(inputStreamSet.getType())),
33    mBasisSetNumbering(basisNumbering)
34{
35}
36
37void LineFeedKernelBuilder::generatePabloMethod() {
38    PabloBuilder pb(getEntryScope());
39    std::unique_ptr<CC_Compiler> ccc;
40    if (mNumOfStreams == 1) {
41        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
42    } else {
43        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
44    }
45    PabloAST * LF = ccc->compileCC("LF", makeByte(0x0A), pb);
46    pb.createAssign(pb.createExtract(getOutput(0), 0), LF);
47}
48
49LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
50: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
51// inputs
52{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
53,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
54// outputs
55{Binding{b->getStreamSetTy(1), "linebreak", FixedRate()}
56,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
57
58}
59
60void LineBreakKernelBuilder::generatePabloMethod() {
61    PabloBuilder pb(getEntryScope());
62    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
63
64    Integer * const ZERO = pb.getInteger(0);
65
66    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
67    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
68    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
69    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
70
71    // Remove the CR of any CR+LF
72    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
73    auto crb = pb.createScope();
74    pb.createIf(CR, crb);
75    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
76    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
77    crb.createAssign(CRLF, crlf);
78    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
79    crb.createAssign(LineBreak, removedCRLF);
80
81
82    // Record the CR marker of any CR+LF
83    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
84
85    // Check for Unicode Line Breaks
86    PabloAST * u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
87    auto it = pb.createScope();
88    pb.createIf(u8pfx, it);
89    PabloAST * u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
90    PabloAST * u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
91
92    // Two-byte sequences
93    auto it2 = it.createScope();
94    it.createIf(u8pfx2, it2);
95    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
96    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
97
98    // Three-byte sequences
99    auto it3 = it.createScope();
100    it.createIf(u8pfx3, it3);
101    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
102    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
103    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
104
105    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
106    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
107}
Note: See TracBrowser for help on using the repository browser.