source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5915

Last change on this file since 5915 was 5915, checked in by cameron, 13 months ago

ORCJIT for LLVM 3.9/4.0 initial check-in

File size: 4.3 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <IR_Gen/idisa_builder.h>
15#include <kernels/kernel_builder.h>
16
17#include <llvm/Support/raw_ostream.h>
18
19using namespace cc;
20using namespace kernel;
21using namespace pablo;
22using namespace re;
23using namespace llvm;
24using namespace IDISA;
25
26LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, Binding && inputStreamSet)
27: PabloKernel(b, "lf" + std::to_string(getNumOfStreams(inputStreamSet.getType())) + "x" + std::to_string(getStreamFieldWidth(inputStreamSet.getType())),
28// input
29{inputStreamSet},
30{Binding{b->getStreamSetTy(1), "lf"}}),
31    mNumOfStreams(getNumOfStreams(inputStreamSet.getType())),
32    mStreamFieldWidth(getStreamFieldWidth(inputStreamSet.getType()))
33{
34}
35
36void LineFeedKernelBuilder::generatePabloMethod() {
37    PabloBuilder pb(getEntryScope());
38    std::unique_ptr<CC_Compiler> ccc;
39    if (mNumOfStreams == 1) {
40        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
41    } else {
42        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"));
43    }
44    PabloAST * LF = ccc->compileCC("LF", makeByte(0x0A), pb);
45    pb.createAssign(pb.createExtract(getOutput(0), 0), LF);
46}
47
48LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
49: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
50// inputs
51{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
52,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
53// outputs
54{Binding{b->getStreamSetTy(1), "linebreak", FixedRate()}
55,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
56
57}
58
59void LineBreakKernelBuilder::generatePabloMethod() {
60    PabloBuilder pb(getEntryScope());
61    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
62
63    Integer * const ZERO = pb.getInteger(0);
64
65    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
66    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
67    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
68    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
69
70    // Remove the CR of any CR+LF
71    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
72    auto crb = pb.createScope();
73    pb.createIf(CR, crb);
74    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
75    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
76    crb.createAssign(CRLF, crlf);
77    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
78    crb.createAssign(LineBreak, removedCRLF);
79
80
81    // Record the CR marker of any CR+LF
82    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
83
84    // Check for Unicode Line Breaks
85    PabloAST * u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
86    auto it = pb.createScope();
87    pb.createIf(u8pfx, it);
88    PabloAST * u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
89    PabloAST * u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
90
91    // Two-byte sequences
92    auto it2 = it.createScope();
93    it.createIf(u8pfx2, it2);
94    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
95    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
96
97    // Three-byte sequences
98    auto it3 = it.createScope();
99    it.createIf(u8pfx3, it3);
100    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
101    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
102    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
103
104    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
105    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
106}
Note: See TracBrowser for help on using the repository browser.