source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 5787

Last change on this file since 5787 was 5782, checked in by nmedfort, 22 months ago

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

File size: 10.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/printer_re.h>
9#include <re/re_toolchain.h>
10#include <pablo/pablo_toolchain.h>
11#include <kernels/kernel_builder.h>
12#include <pablo/builder.hpp>
13#include <pablo/pe_ones.h>          // for Ones
14#include <pablo/pe_var.h>           // for Var
15#include <pablo/pe_zeroes.h>        // for Zeroes
16#include <pablo/boolean.h>
17#include <pablo/pe_count.h>
18#include <pablo/pe_matchstar.h>
19#include "cc/cc_compiler.h"         // for CC_Compiler
20
21#include <llvm/Support/raw_ostream.h>
22
23using namespace kernel;
24using namespace pablo;
25using namespace re;
26using namespace llvm;
27
28inline static std::string sha1sum(const std::string & str) {
29    char buffer[41];    // 40 hex-digits and the terminating null
30    uint32_t digest[5]; // 160 bits in total
31    boost::uuids::detail::sha1 sha1;
32    sha1.process_bytes(str.c_str(), str.size());
33    sha1.get_digest(digest);
34    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
35             digest[0], digest[1], digest[2], digest[3], digest[4]);
36    return std::string(buffer);
37}
38
39void RequiredStreams_UTF8::generatePabloMethod() {
40   
41    cc::CC_Compiler ccc(this, getInput(0));
42    auto & pb = ccc.getBuilder();
43    Zeroes * const ZEROES = pb.createZeroes();
44    PabloAST * const u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
45
46
47    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
48    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
49    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
50
51    PabloBuilder it = PabloBuilder::Create(pb);
52
53    pb.createIf(u8pfx, it);
54    PabloAST * const u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
55    PabloAST * const u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
56    PabloAST * const u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
57    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
58   
59    //
60    // Two-byte sequences
61    Var * const anyscope = it.createVar("anyscope", ZEROES);
62    PabloBuilder it2 = PabloBuilder::Create(it);
63    it.createIf(u8pfx2, it2);
64    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
65
66    //
67    // Three-byte sequences   
68    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
69    PabloBuilder it3 = PabloBuilder::Create(it);
70    it.createIf(u8pfx3, it3);
71    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
72    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
73    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
74    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
75    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
76    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
77    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
78    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
79    it3.createAssign(EF_invalid, EX_invalid);
80
81
82    //
83    // Four-byte sequences
84    PabloBuilder it4 = PabloBuilder::Create(it);
85    it.createIf(u8pfx4, it4);
86    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
87    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
88    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
89    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
90    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
91    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
92    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
93    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
94    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
95    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
96    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
97   
98    //
99    // Invalid cases
100    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
101    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
102    //  a scope is a mismatch, i.e., invalid UTF-8.
103    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
104    //
105    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
106    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
107    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
108    //
109    //
110   
111    it.createAssign(valid_pfx, it.createAnd(valid_pfx, u8valid));
112    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
113   
114    PabloAST * u8single = pb.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), pb.createNot(u8invalid));
115    PabloAST * const initial = pb.createOr(u8single, valid_pfx, "initial");
116    PabloAST * const final = pb.createNot(pb.createOr(nonFinal, u8invalid), "final");
117
118    Var * const required = getOutputStreamVar("required");
119    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
120    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
121    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
122
123}
124
125RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
126: PabloKernel(kb, "RequiredStreams_UTF8",
127// input
128{Binding{kb->getStreamSetTy(8), "basis"}},
129// output
130{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
131
132}
133
134void RequiredStreams_UTF16::generatePabloMethod() {
135   
136    cc::CC_Compiler ccc(this, getInput(0));
137    auto & pb = ccc.getBuilder();
138   
139    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF));    //u16hi_hi_surrogate = [\xD8-\xDB]
140    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF));    //u16hi_lo_surrogate = [\xDC-\xDF]
141   
142    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
143    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
144
145    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
146    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
147
148    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF)), ccc.compileCC(makeCC(0xE000, 0xFFFF)));
149    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
150
151    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
152    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
153    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
154
155    Var * const required = getOutputStreamVar("required");
156    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
157    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
158    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
159
160}
161
162RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
163: PabloKernel(kb, "RequiredStreams_UTF16",               
164// inputs
165{Binding{kb->getStreamSetTy(8), "basis"}},
166// output
167{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
168
169}
170
171ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
172: mRE(re_ast)
173, mSignature(Printer_RE::PrintRE(mRE)) {
174   
175}
176
177ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, unsigned numOfCharacterClasses)
178: ICGrepSignature(re)
179, PabloKernel(iBuilder, "ic" + sha1sum(mSignature),
180// inputs
181{Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
182Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
183Binding{iBuilder->getStreamSetTy(1, 1), "cr+lf"},
184Binding{iBuilder->getStreamSetTy(3, 1), "required"}},
185// output
186{Binding{iBuilder->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}}) {
187
188}
189
190std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
191    return mSignature;
192}
193
194void ICGrepKernel::generatePabloMethod() {
195    PabloAST * const match_post = re2pablo_compiler(this, mRE);
196    PabloBlock * const pb = getEntryBlock();
197    Var * const output = getOutputStreamVar("matches");
198    pb->createAssign(pb->createExtract(output, pb->getInteger(0)), match_post);
199}
200
201void MatchedLinesKernel::generatePabloMethod() {
202    auto pb = this->getEntryBlock();
203    PabloAST * matchResults = pb->createExtract(getInputStreamVar("matchResults"), pb->getInteger(0));
204    PabloAST * lineBreaks = pb->createExtract(getInputStreamVar("lineBreaks"), pb->getInteger(0));
205    PabloAST * notLB = pb->createNot(lineBreaks);
206    PabloAST * match_follow = pb->createMatchStar(matchResults, notLB);
207    Var * const matchedLines = getOutputStreamVar("matchedLines");
208    pb->createAssign(pb->createExtract(matchedLines, pb->getInteger(0)), pb->createAnd(match_follow, lineBreaks));
209}
210
211MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
212: PabloKernel(iBuilder, "MatchedLines",
213// inputs
214{Binding{iBuilder->getStreamSetTy(1), "matchResults"}
215,Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
216// output
217{Binding{iBuilder->getStreamSetTy(1), "matchedLines"}}) {
218
219}
220
221
222void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
223    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
224    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
225    Value * inverted = iBuilder->CreateXor(input, lbs);
226    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
227}
228
229InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
230: BlockOrientedKernel("Invert",
231    // Inputs
232    {Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
233    // Outputs
234    {Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
235    // Input/Output Scalars and internal state
236    {}, {}, {}) {
237    setNoTerminateAttribute(true);   
238}
239
240
241void PopcountKernel::generatePabloMethod() {
242    auto pb = this->getEntryBlock();
243    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
244    pablo::Var * countResult = getOutputScalarVar("countResult");
245    pb->createAssign(countResult, pb->createCount(toCount));
246}
247
248PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
249: PabloKernel(iBuilder, "Popcount",
250              {Binding{iBuilder->getStreamSetTy(1), "toCount"}},
251              {},
252              {},
253              {Binding{iBuilder->getSizeTy(), "countResult"}}) {
254}
Note: See TracBrowser for help on using the repository browser.