source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5413

Last change on this file since 5413 was 5401, checked in by nmedfort, 2 years ago

Updated all projects to use ParabixDriver?. Deprecated original pipeline generation methods. Enabled LLVM optimizations, IR and ASM printing for Kernel modules. Enabled object cache by default. Begun work on moving consumed position information back to producing kernels.

File size: 3.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <llvm/IR/Module.h>
15
16#include <llvm/Support/raw_ostream.h>
17
18using namespace cc;
19using namespace kernel;
20using namespace pablo;
21using namespace re;
22using namespace llvm;
23
24#define UNICODE_LINE_BREAK (!AlgorithmOptionIsSet(DisableUnicodeLineBreak))
25
26LineBreakKernelBuilder::LineBreakKernelBuilder (
27IDISA::IDISA_Builder * iBuilder
28, unsigned basisBitsCount)
29: PabloKernel(iBuilder, "Parabix:lb_kernel", {Binding{iBuilder->getStreamSetTy(basisBitsCount), "basis"}}, {Binding{iBuilder->getStreamSetTy(1), "linebreak", Add1()}}) {
30
31    CC_Compiler ccc(this, getInput(0));
32    auto & builder = ccc.getBuilder();
33   
34    PabloAST * LineBreak = nullptr;
35    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), builder);
36    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
37    PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
38
39    Zeroes * const zero = builder.createZeroes();
40    Var * crlf = builder.createVar("crlf", zero);
41    PabloBuilder crb = PabloBuilder::Create(builder);
42#ifndef USE_LOOKAHEAD_CRLF
43    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
44    crb.createAssign(crlf, crb.createAnd(cr1, LF));
45#else
46    PabloAST * lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
47    crb.createAssign(crlf, crb.createAnd(CR, lookaheadLF));
48#endif
49    builder.createIf(CR, crb);
50   
51    Var * NEL_LS_PS = builder.createVar("NEL_LS_PS", zero);
52
53    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
54    PabloBuilder it = PabloBuilder::Create(builder);
55    builder.createIf(u8pfx, it);
56    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
57    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
58
59    //
60    // Two-byte sequences
61    Var * NEL = it.createVar("NEL", zero);
62    PabloBuilder it2 = PabloBuilder::Create(it);
63    it2.createAssign(NEL, it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
64    it.createIf(u8pfx2, it2);
65
66    //
67    // Three-byte sequences
68
69    Var * LS_PS = it.createVar("LS_PS", zero);
70    PabloBuilder it3 = PabloBuilder::Create(it);
71    it.createIf(u8pfx3, it3);
72    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
73    it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
74    it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
75
76
77    PabloAST * LB_chars = builder.createOr(LF_VT_FF_CR, NEL_LS_PS);
78    PabloAST * UnicodeLineBreak = builder.createAnd(LB_chars, builder.createNot(crlf));  // count the CR, but not CRLF
79
80    PabloAST * lb = UNICODE_LINE_BREAK ? UnicodeLineBreak : LF;
81    PabloAST * unterminatedLineAtEOF = builder.createAtEOF(builder.createAdvance(builder.createNot(LB_chars), 1));
82    LineBreak = builder.createOr(lb, unterminatedLineAtEOF);
83    PabloAST * const r = builder.createExtract(getOutput(0), builder.getInteger(0));
84    builder.createAssign(r, LineBreak);
85#ifdef USE_LOOKAHEAD_CRLF
86    setLookAhead(1);
87#endif
88}
Note: See TracBrowser for help on using the repository browser.