source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6047

Last change on this file since 6047 was 6047, checked in by nmedfort, 12 months ago

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

File size: 26.2 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53
54
55
56namespace re { class CC; }
57
58using namespace llvm;
59using namespace parabix;
60using namespace kernel;
61using namespace grep;
62
63LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
64    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
65    mMoveMatchesToEOL = true;
66}
67
68void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
69    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
70        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
71    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
72        mBreakCC = re::makeByte(0);  // Null
73    } else {
74        mBreakCC = re::makeByte(0x0A); // LF
75    }
76    re::RE * anchorRE = mBreakCC;
77    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
78        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
79        anchorName->setDefinition(re::makeUnicodeBreak());
80        anchorRE = anchorName;
81    }
82
83    mREs = REs;
84    bool allAnchored = true;
85    for(unsigned i = 0; i < mREs.size(); ++i) {
86        if (!hasEndAnchor(mREs[i])) allAnchored = false;
87        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
88        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
89        mREs[i] = resolveAnchors(mREs[i], anchorRE);
90        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
91        mREs[i] = regular_expression_passes(mREs[i]);
92    }
93    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
94
95}
96
97
98parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
99//    auto mGrepDriver = &mPxDriver;
100    const unsigned baseBufferSize = this->getInputBufferBlocks();
101    auto & idb = mPxDriver.getBuilder();
102    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
103    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
104    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
105    return LineFeedStream;
106}
107
108parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
109    auto mGrepDriver = &mPxDriver;
110    auto & idb = mGrepDriver->getBuilder();
111
112    // Extract (Deletion)
113    this->generateCompressionMarker(idb);
114
115    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
116    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
117
118    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
119    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
120
121    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
122    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
123    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
124
125    // Deposit
126    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
127    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
128    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
129
130    // Match Copy
131    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
132    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
133    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
134
135    return matchCopiedBits;
136}
137
138parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
139    auto mGrepDriver = &mPxDriver;
140    const unsigned baseBufferSize = this->getInputBufferBlocks();
141    auto & idb = mGrepDriver->getBuilder();
142
143    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
144    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
145    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
146    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
147}
148
149
150std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs) {
151
152    this->initREs(REs);
153    auto mGrepDriver = &mPxDriver;
154
155    auto & idb = mGrepDriver->getBuilder();
156    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
157    const unsigned baseBufferSize = this->getInputBufferBlocks();
158    bool CC_Multiplexing = true;
159    int MaxCountFlag = 0;
160
161    //  Regular Expression Processing and Analysis Phase
162    const auto nREs = mREs.size();
163
164    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
165
166
167
168    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
169    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
170        // TODO fix here
171//        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
172//        mGrepDriver->makeKernelCall(breakK, {matchCopiedBasisBits}, {LineBreakStream});
173    }
174
175    std::map<std::string, StreamSetBuffer *> propertyStream;
176
177    std::vector<std::string> externalStreamNames;
178    std::set<re::Name *> UnicodeProperties;
179
180    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
181    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
182
183    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
184    mREs[0] = transformCCs(mpx.get(), mREs[0]);
185    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
186    auto numOfCharacterClasses = mpx_basis.size();
187    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
188
189    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
190    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses}); //TODO get it from compression space
191
192    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
193    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
194    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
195
196    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
197    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
198    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
199
200    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
201    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
202    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
203    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
204    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
205
206    /*
207    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
208    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
209     */
210
211    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
212    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
213    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
214
215    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
216    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
217    MatchResultsBufs[0] = MatchResults;
218
219    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
220    if (mREs.size() > 1) {
221        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
222        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
223        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
224    }
225    StreamSetBuffer * Matches = MergedResults;
226    if (mMoveMatchesToEOL) {
227        StreamSetBuffer * OriginalMatches = Matches;
228        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
229        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
230        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
231    }
232
233    if (MaxCountFlag > 0) {
234        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
235        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
236        StreamSetBuffer * const AllMatches = Matches;
237        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
238        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
239    }
240
241    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
242};
243
244std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
245        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
246
247    this->initREs(REs);
248    auto mGrepDriver = &mPxDriver;
249
250    auto & idb = mGrepDriver->getBuilder();
251    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
252    const unsigned baseBufferSize = this->getInputBufferBlocks();
253    int MaxCountFlag = 0;
254
255    //  Regular Expression Processing and Analysis Phase
256    const auto nREs = mREs.size();
257
258    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
259    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
260
261    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
262    StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
263
264    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
265
266    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
267    mGrepDriver->makeKernelCall(requiredStreamsK, {decompressedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
268
269    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
270        LineBreakStream = LineFeedStream;
271    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
272        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
273        mGrepDriver->makeKernelCall(breakK, {decompressedBasisBits}, {LineBreakStream});
274    } else {
275        LineBreakStream = UnicodeLB;
276    }
277
278    std::map<std::string, StreamSetBuffer *> propertyStream;
279
280    for(unsigned i = 0; i < nREs; ++i) {
281        std::vector<std::string> externalStreamNames;
282        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
283        if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
284            externalStreamNames.push_back("UTF8_LB");
285            icgrepInputSets.push_back(LineBreakStream);
286            externalStreamNames.push_back("UTF8_nonfinal");
287            icgrepInputSets.push_back(RequiredStreams);
288        }
289        std::set<re::Name *> UnicodeProperties;
290
291        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
292        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
293        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
294        MatchResultsBufs[i] = MatchResults;
295    }
296
297    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
298    if (mREs.size() > 1) {
299        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
300        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
301        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
302    }
303    StreamSetBuffer * Matches = MergedResults;
304    if (mMoveMatchesToEOL) {
305        StreamSetBuffer * OriginalMatches = Matches;
306        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
307        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
308        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
309    }
310
311    if (MaxCountFlag > 0) {
312        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
313        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
314        StreamSetBuffer * const AllMatches = Matches;
315        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
316        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
317    }
318
319    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
320
321}
322
323void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
324    auto main = this->getScanMatchGrepMainFunction();
325    std::ostringstream s;
326    EmitMatch accum("", false, false, s);
327
328    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
329    llvm::outs() << s.str();
330}
331
332void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
333    auto & iBuilder = mPxDriver.getBuilder();
334    this->generateScanMatchMainFunc(iBuilder);
335
336    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
337
338    // GeneratePipeline
339    this->generateLoadByteStreamAndBitStream(iBuilder);
340    this->generateExtractAndDepositMarkers(iBuilder);
341
342    auto swizzle = this->generateSwizzleExtractData(iBuilder);
343
344    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
345    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
346
347    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
348    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
349
350    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
351    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
352
353    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
354    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
355
356    // Produce unswizzled bit streams
357    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
358    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
359    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
360
361    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
362    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
363
364    StreamSetBuffer * LineBreakStream;
365    StreamSetBuffer * Matches;
366    std::vector<re::RE*> res = {regex};
367    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
368
369    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
370    scanMatchK->setInitialArguments({match_accumulator});
371    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
372    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
373    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
374
375    mPxDriver.generatePipelineIR();
376    mPxDriver.deallocateBuffers();
377
378    iBuilder->CreateRetVoid();
379
380    mPxDriver.finalizeObject();
381}
382
383void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
384    auto & iBuilder = mPxDriver.getBuilder();
385    this->generateMainFunc(iBuilder);
386
387    // GeneratePipeline
388    this->generateLoadByteStreamAndBitStream(iBuilder);
389    this->generateExtractAndDepositMarkers(iBuilder);
390
391    StreamSetBuffer * LineBreakStream;
392    StreamSetBuffer * Matches;
393    std::vector<re::RE*> res = {regex};
394    if (mEnableMultiplexing) {
395        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
396    } else {
397        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
398        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
399        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
400        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
401
402        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
403        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
404        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
405
406        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
407    };
408
409    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
410    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
411    mPxDriver.generatePipelineIR();
412
413    iBuilder->setKernel(matchCountK);
414    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
415    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
416    iBuilder->CallPrintInt("aaa", matchedLineCount);
417
418    mPxDriver.deallocateBuffers();
419
420    // TODO return matchedLineCount
421//        idb->CreateRet(matchedLineCount);
422
423    iBuilder->CreateRetVoid();
424
425    mPxDriver.finalizeObject();
426}
427
428
429void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
430    auto & iBuilder = mPxDriver.getBuilder();
431    this->generateMainFunc(iBuilder);
432
433
434    // GeneratePipeline
435    this->generateLoadByteStreamAndBitStream(iBuilder);
436    this->generateExtractAndDepositMarkers(iBuilder);
437
438    auto swizzle = this->generateSwizzleExtractData(iBuilder);
439
440    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
441    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
442
443    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
444    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
445
446
447    // split PDEP into 2 kernel will be a little slower in single thread environment
448/*
449    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
450    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
451
452    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
453    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
454*/
455
456    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
457    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
458
459    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
460    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
461
462    // Produce unswizzled bit streams
463    StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
464    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
465    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
466
467    StreamSetBuffer * LineBreakStream;
468    StreamSetBuffer * Matches;
469    std::vector<re::RE*> res = {regex};
470//    if (mEnableMultiplexing) {
471//        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedbits);
472//    } else {
473        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
474//    };
475
476    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
477    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
478    mPxDriver.generatePipelineIR();
479
480    iBuilder->setKernel(matchCountK);
481    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
482    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
483    iBuilder->CallPrintInt("aaa", matchedLineCount);
484
485    mPxDriver.deallocateBuffers();
486
487    // TODO return matchedLineCount
488//        idb->CreateRet(matchedLineCount);
489
490    iBuilder->CreateRetVoid();
491
492    mPxDriver.finalizeObject();
493}
494
495ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
496    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
497}
498
499void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
500    Module * M = iBuilder->getModule();
501    Type * const sizeTy = iBuilder->getSizeTy();
502    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
503    Type * const voidTy = iBuilder->getVoidTy();
504    Type * const inputType = iBuilder->getInt8PtrTy();
505    Type * const intAddrTy = iBuilder->getIntAddrTy();
506
507    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
508    main->setCallingConv(CallingConv::C);
509    Function::arg_iterator args = main->arg_begin();
510    mInputStream = &*(args++);
511    mInputStream->setName("input");
512
513    mHeaderSize = &*(args++);
514    mHeaderSize->setName("mHeaderSize");
515
516    mFileSize = &*(args++);
517    mFileSize->setName("mFileSize");
518
519    mHasBlockChecksum = &*(args++);
520    mHasBlockChecksum->setName("mHasBlockChecksum");
521
522    match_accumulator = &*(args++);
523    match_accumulator->setName("match_accumulator");
524
525    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
526}
Note: See TracBrowser for help on using the repository browser.