source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 5772

Last change on this file since 5772 was 5769, checked in by cameron, 19 months ago

Decoupling case-insensitive transform from parser

File size: 11.2 KB
RevLine 
[5404]1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/printer_re.h>
9#include <re/re_toolchain.h>
10#include <pablo/pablo_toolchain.h>
[5436]11#include <kernels/kernel_builder.h>
12#include <pablo/builder.hpp>
[5561]13#include <pablo/pe_ones.h>          // for Ones
14#include <pablo/pe_var.h>           // for Var
15#include <pablo/pe_zeroes.h>        // for Zeroes
[5548]16#include <pablo/boolean.h>
[5413]17#include <pablo/pe_count.h>
[5548]18#include <pablo/pe_matchstar.h>
[5561]19#include "cc/cc_compiler.h"         // for CC_Compiler
[5404]20
[5464]21#include <llvm/Support/raw_ostream.h>
22
[5404]23using namespace kernel;
24using namespace pablo;
25using namespace re;
26using namespace llvm;
27
[5408]28inline static std::string sha1sum(const std::string & str) {
[5404]29    char buffer[41];    // 40 hex-digits and the terminating null
[5408]30    uint32_t digest[5]; // 160 bits in total
[5404]31    boost::uuids::detail::sha1 sha1;
32    sha1.process_bytes(str.c_str(), str.size());
33    sha1.get_digest(digest);
34    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
35             digest[0], digest[1], digest[2], digest[3], digest[4]);
36    return std::string(buffer);
37}
38
[5561]39void RequiredStreams_UTF8::generatePabloMethod() {
40   
41    cc::CC_Compiler ccc(this, getInput(0));
42    auto & pb = ccc.getBuilder();
43    Zeroes * const zero = pb.createZeroes();
44    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
45    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
46
47    Var * crlf = pb.createVar("crlf", zero);
48    PabloBuilder crb = PabloBuilder::Create(pb);
49    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
50    crb.createAssign(crlf, crb.createAnd(cr1, LF));
51    pb.createIf(CR, crb);
52   
53    Var * u8invalid = pb.createVar("u8invalid", zero);
54    Var * valid_pfx = pb.createVar("valid_pfx", zero);
55    Var * nonFinal = pb.createVar("nonfinal", zero);
56    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
57   
58    PabloBuilder it = PabloBuilder::Create(pb);
59
60    pb.createIf(u8pfx, it);
61    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
62    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
63    PabloAST * u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
64    PabloAST * u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
65   
66    //
67    // Two-byte sequences
68    Var * u8scope22 = it.createVar("u8scope22", zero);
69    PabloBuilder it2 = PabloBuilder::Create(it);
70    it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
71    it.createIf(u8pfx2, it2);
72    //
73    // Three-byte sequences
74   
75    Var * u8scope32 = it.createVar("u8scope32", zero);
76    Var * u8scope3X = it.createVar("u8scope3X", zero);
77    Var * EX_invalid = it.createVar("EX_invalid", zero);
78    PabloBuilder it3 = PabloBuilder::Create(it);
79    it.createIf(u8pfx3, it3);
80    it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
81    PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
82    it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
83    PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
84    PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
85    it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
86   
87    //
88    // Four-byte sequences
89    Var * u8scope4nonfinal = it.createVar("u8scope4nonfinal", zero);
90    Var * u8scope4X = it.createVar("u8scope4X", zero);
91    Var * FX_invalid = it.createVar("FX_invalid", zero);
92    PabloBuilder it4 = PabloBuilder::Create(it);
93    it.createIf(u8pfx4, it4);
94    PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
95    PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
96    PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
97    it4.createAssign(u8scope4nonfinal, it4.createOr(u8scope42, u8scope43));
98    it4.createAssign(u8scope4X, it4.createOr(u8scope4nonfinal, u8scope44));
99    PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
100    PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
101    it4.createAssign(FX_invalid, it4.createOr(F0_invalid, F4_invalid));
102   
103    //
104    // Invalid cases
105    PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
106    PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
107    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
108    //  a scope is a mismatch, i.e., invalid UTF-8.
109    PabloAST * mismatch = it.createXor(anyscope, u8suffix);
110    //
111    PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
112    PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
113    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
114    PabloAST * u8valid = it.createNot(u8invalid, "u8valid");
115    //
116    //
117   
118    it.createAssign(valid_pfx, it.createAnd(u8pfx, u8valid));
119    it.createAssign(nonFinal, it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
120   
121    PabloAST * u8single = pb.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), pb.createNot(u8invalid));
122   
123    Var * const required = getOutputStreamVar("required");
124    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u8single, valid_pfx, "initial"));
125    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
126    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(nonFinal, u8invalid), "final"));
127    pb.createAssign(pb.createExtract(required, pb.getInteger(3)), crlf);
128}
129
130RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
131: PabloKernel(kb, "RequiredStreams_UTF8",               
132              {Binding{kb->getStreamSetTy(8), "basis"}}, 
[5706]133              {Binding{kb->getStreamSetTy(4), "required", FixedRate(), Add1()}},
[5561]134              {},
135              {}) {
136}
137
138void RequiredStreams_UTF16::generatePabloMethod() {
139   
140    cc::CC_Compiler ccc(this, getInput(0));
141    auto & pb = ccc.getBuilder();
142   
143    PabloAST * LF = ccc.compileCC("LF", makeCC(0x000A), pb);
144    PabloAST * CR = ccc.compileCC("CR", makeCC(0x000D), pb);
145    PabloAST * cr1 = pb.createAdvance(CR, 1, "cr1");
146   
147    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF));    //u16hi_hi_surrogate = [\xD8-\xDB]
148    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF));    //u16hi_lo_surrogate = [\xDC-\xDF]
149   
150    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
151    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
152    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
153   
154    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF)), ccc.compileCC(makeCC(0xE000, 0xFFFF)));
155    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
156
157    Var * const required = getOutputStreamVar("required");
158    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u16single, u16hi_hi_surrogate, "initial"));
159    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal"));
160    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(u16hi_hi_surrogate, u16invalid), "final"));
161    pb.createAssign(pb.createExtract(required, pb.getInteger(3)), pb.createAnd(cr1, LF, "crlf"));
162}
163
164RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
165: PabloKernel(kb, "RequiredStreams_UTF16",               
166              {Binding{kb->getStreamSetTy(16), "basis"}}, 
[5706]167              {Binding{kb->getStreamSetTy(4), "required", FixedRate(), Add1()}},
[5561]168              {},
169              {}) {
170}
171
172
[5769]173ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
174: mRE(re_ast)
175, mSignature(Printer_RE::PrintRE(mRE)) {
176   
177}
178
[5646]179ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, unsigned numOfCharacterClasses)
[5769]180: ICGrepSignature(re)
[5454]181, PabloKernel(iBuilder,
182              "ic" + sha1sum(mSignature),
[5646]183              {Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
184               Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
185               Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
[5706]186              {Binding{iBuilder->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}}) {
[5646]187
[5404]188}
189
[5454]190std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
[5404]191    return mSignature;
192}
193
[5454]194void ICGrepKernel::generatePabloMethod() {
[5646]195    PabloAST * const match_post = re2pablo_compiler(this, mRE);
196    PabloBlock * const pb = getEntryBlock();
197    Var * const output = getOutputStreamVar("matches");
198    pb->createAssign(pb->createExtract(output, pb->getInteger(0)), match_post);
[5404]199}
[5413]200
[5548]201void MatchedLinesKernel::generatePabloMethod() {
202    auto pb = this->getEntryBlock();
203    PabloAST * matchResults = pb->createExtract(getInputStreamVar("matchResults"), pb->getInteger(0));
204    PabloAST * lineBreaks = pb->createExtract(getInputStreamVar("lineBreaks"), pb->getInteger(0));
205    PabloAST * notLB = pb->createNot(lineBreaks);
206    PabloAST * match_follow = pb->createMatchStar(matchResults, notLB);
207    Var * const matchedLines = getOutputStreamVar("matchedLines");
208    pb->createAssign(pb->createExtract(matchedLines, pb->getInteger(0)), pb->createAnd(match_follow, lineBreaks));
209}
210
211MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
212: PabloKernel(iBuilder, "MatchedLines",
213              {Binding{iBuilder->getStreamSetTy(1), "matchResults"}, Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
214              {Binding{iBuilder->getStreamSetTy(1), "matchedLines"}},
215              {},
216              {}) {
217}
218
219
[5440]220void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
221    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
222    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
[5413]223    Value * inverted = iBuilder->CreateXor(input, lbs);
[5440]224    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
[5413]225}
226
[5436]227InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
[5706]228: BlockOrientedKernel("Invert",
229    // Inputs
230    {Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
231    // Outputs
232    {Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
233    // Input/Output Scalars and internal state
234    {}, {}, {}) {
[5435]235    setNoTerminateAttribute(true);   
[5413]236}
237
238
[5491]239void PopcountKernel::generatePabloMethod() {
240    auto pb = this->getEntryBlock();
241    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
242    pablo::Var * countResult = getOutputScalarVar("countResult");
243    pb->createAssign(countResult, pb->createCount(toCount));
244}
245
[5436]246PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
[5413]247: PabloKernel(iBuilder, "Popcount",
248              {Binding{iBuilder->getStreamSetTy(1), "toCount"}},
249              {},
250              {},
251              {Binding{iBuilder->getSizeTy(), "countResult"}}) {
252}
Note: See TracBrowser for help on using the repository browser.