source: icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp @ 5377

Last change on this file since 5377 was 5370, checked in by xuedongx, 2 years ago

Add1 processing rate; pablo Count only up to EOFbit

File size: 3.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "linebreak_kernel.h"
7#include <re/re_cc.h>
8#include <re/re_toolchain.h>
9#include <pablo/pe_ones.h>          // for Ones
10#include <pablo/pe_var.h>           // for Var
11#include <pablo/pe_zeroes.h>        // for Zeroes
12#include <cc/cc_compiler.h>
13#include <pablo/builder.hpp>
14#include <llvm/IR/Module.h>
15
16#include <llvm/Support/raw_ostream.h>
17
18using namespace cc;
19using namespace kernel;
20using namespace pablo;
21using namespace re;
22using namespace llvm;
23
24#define UNICODE_LINE_BREAK (!AlgorithmOptionIsSet(DisableUnicodeLineBreak))
25
26LineBreakKernelBuilder::LineBreakKernelBuilder (
27IDISA::IDISA_Builder * iBuilder
28, std::string linebreak
29, unsigned basisBitsCount)
30: PabloKernel(iBuilder, linebreak +"_kernel", {Binding{iBuilder->getStreamSetTy(basisBitsCount), "basis"}}, {Binding{iBuilder->getStreamSetTy(1), "linebreak", Add1()}}) {
31
32    CC_Compiler ccc(this, getInput(0));
33    auto & builder = ccc.getBuilder();
34   
35    PabloAST * LineBreak = nullptr;
36    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), builder);
37    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
38    PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
39
40    Zeroes * const zero = builder.createZeroes();
41    Var * crlf = builder.createVar("crlf", zero);
42    PabloBuilder crb = PabloBuilder::Create(builder);
43    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
44    crb.createAssign(crlf, crb.createAnd(cr1, LF));
45    builder.createIf(CR, crb);
46   
47    Var * NEL_LS_PS = builder.createVar("NEL_LS_PS", zero);
48
49    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
50    PabloBuilder it = PabloBuilder::Create(builder);
51    builder.createIf(u8pfx, it);
52    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
53    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
54
55    //
56    // Two-byte sequences
57    Var * NEL = it.createVar("NEL", zero);
58    PabloBuilder it2 = PabloBuilder::Create(it);
59    it2.createAssign(NEL, it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
60    it.createIf(u8pfx2, it2);
61
62    //
63    // Three-byte sequences
64
65    Var * LS_PS = it.createVar("LS_PS", zero);
66    PabloBuilder it3 = PabloBuilder::Create(it);
67    it.createIf(u8pfx3, it3);
68    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
69    it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
70    it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
71
72
73    PabloAST * LB_chars = builder.createOr(LF_VT_FF_CR, NEL_LS_PS);
74    PabloAST * UnicodeLineBreak = builder.createAnd(LB_chars, builder.createNot(crlf));  // count the CR, but not CRLF
75   
76    PabloAST * lb = UNICODE_LINE_BREAK ? UnicodeLineBreak : LF;
77    PabloAST * unterminatedLineAtEOF = builder.createAtEOF(builder.createAdvance(builder.createNot(LB_chars), 1));
78    LineBreak = builder.createOr(lb, unterminatedLineAtEOF);
79    PabloAST * const r = builder.createExtract(getOutput(0), builder.getInteger(0));
80    builder.createAssign(r, LineBreak);
81}
Note: See TracBrowser for help on using the repository browser.