source: icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp @ 5953

Last change on this file since 5953 was 5953, checked in by cameron, 14 months ago

Use AlignedAllocator? when boost::align unavailable (Jenkins test server).

File size: 40.8 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#include <set>
7#include "grep_engine.h"
8#include <llvm/IR/Module.h>
9#include <boost/filesystem.hpp>
10#include <UCD/resolve_properties.h>
11#include <kernels/charclasses.h>
12#include <kernels/cc_kernel.h>
13#include <kernels/grep_kernel.h>
14#include <kernels/UCD_property_kernel.h>
15#include <kernels/grapheme_kernel.h>
16#include <kernels/linebreak_kernel.h>
17#include <kernels/streams_merge.h>
18#include <kernels/source_kernel.h>
19#include <kernels/s2p_kernel.h>
20#include <kernels/scanmatchgen.h>
21#include <kernels/streamset.h>
22#include <kernels/until_n.h>
23#include <kernels/kernel_builder.h>
24#include <pablo/pablo_kernel.h>
25#include <cc/alphabet.h>
26#include <re/re_cc.h>
27#include <re/re_name.h>
28#include <re/casing.h>
29#include <re/exclude_CC.h>
30#include <re/to_utf8.h>
31#include <re/re_toolchain.h>
32#include <toolchain/toolchain.h>
33#include <re/re_analysis.h>
34#include <re/re_name_resolve.h>
35#include <re/re_name_gather.h>
36#include <re/collect_ccs.h>
37#include <re/replaceCC.h>
38#include <re/re_multiplex.h>
39#include <re/grapheme_clusters.h>
40#include <re/printer_re.h>
41#include <toolchain/toolchain.h>
42#include <toolchain/cpudriver.h>
43#include <iostream>
44#include <cc/multiplex_CCs.h>
45#include <llvm/Support/raw_ostream.h>
46#include <util/file_select.h>
47#include <util/aligned_allocator.h>
48#include <sys/stat.h>
49#include <fcntl.h>
50#include <errno.h>
51#include <llvm/ADT/STLExtras.h> // for make_unique
52#include <llvm/Support/CommandLine.h>
53#include <llvm/Support/Debug.h>
54#include <llvm/Support/Casting.h>
55#include <sched.h>
56
57using namespace parabix;
58using namespace llvm;
59using namespace cc;
60using namespace kernel;
61
62static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(2));
63static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
64static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
65static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
66static cl::opt<bool> MultithreadedSimpleRE("enable-simple-RE-kernels", cl::desc("Enable individual CC kernels for simple REs."), cl::init(false));
67const unsigned DefaultByteCClimit = 6;
68
69static cl::opt<unsigned> ByteCClimit("byte-CC-limit", cl::desc("Max number of CCs for byte CC pipeline."), cl::init(DefaultByteCClimit));
70
71
72namespace grep {
73   
74
75extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end) {
76    reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
77}
78
79extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end) {
80    reinterpret_cast<MatchAccumulator *>(accum_addr)->finalize_match(buffer_end);
81}
82   
83
84inline static size_t ceil_log2(const size_t v) {
85    assert ("log2(0) is undefined!" && v != 0);
86    assert ("sizeof(size_t) == sizeof(ulong)" && sizeof(size_t) == sizeof(ulong));
87    return (sizeof(size_t) * CHAR_BIT) - __builtin_clzl(v - 1UL);
88}
89
90void SearchableBuffer::addSearchCandidate(char * C_string_ptr, size_t length) {
91    if (mSpace_used + length >= mAllocated_capacity) {
92        size_t new_capacity = size_t{1} << (ceil_log2(mSpace_used + length + 1));
93        AlignedAllocator<char, BUFFER_ALIGNMENT> alloc;
94        char * new_buffer = alloc.allocate(new_capacity, 0);
95        memcpy(new_buffer, mBuffer_base, mSpace_used);
96        memset(&new_buffer[mSpace_used], 0, new_capacity-mSpace_used);
97        if (mBuffer_base != mInitial_buffer) {
98            alloc.deallocate(mBuffer_base, 0);
99        }
100        mBuffer_base = new_buffer;
101        mAllocated_capacity = new_capacity;
102    }
103    memcpy((void * ) &mBuffer_base[mSpace_used], C_string_ptr, length+1);
104    mSpace_used += length+1;
105    assert("Search candidate not null terminated" && (buffer_base[mSpace_used] == '\0'));
106    mEntries++;
107}
108
109SearchableBuffer::SearchableBuffer() :
110    mAllocated_capacity(INITIAL_CAPACITY), mBuffer_base(mInitial_buffer) {
111    memset(mBuffer_base, 0, INITIAL_CAPACITY);
112}
113
114SearchableBuffer::~SearchableBuffer() {
115    if (mBuffer_base != mInitial_buffer) {
116        AlignedAllocator<char, BUFFER_ALIGNMENT> alloc;
117        alloc.deallocate(mBuffer_base, 0);
118    }
119}
120
121   
122
123   
124void grepBuffer(re::RE * pattern, const char * search_buffer, size_t bufferLength, MatchAccumulator * accum) {
125    const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
126    auto segParallelModeSave = codegen::SegmentPipelineParallel;
127    codegen::SegmentPipelineParallel = false;
128   
129    pattern = resolveCaseInsensitiveMode(pattern, false);
130    pattern = regular_expression_passes(pattern);
131    pattern = re::exclude_CC(pattern, re::makeByte(0x0A));
132    pattern = resolveAnchors(pattern, re::makeByte(0x0A));
133
134    ParabixDriver pxDriver("codepointEngine");
135    auto & idb = pxDriver.getBuilder();
136    Module * M = idb->getModule();
137   
138    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
139    mainFunc->setCallingConv(CallingConv::C);
140    auto args = mainFunc->arg_begin();
141    Value * const buffer = &*(args++);
142    buffer->setName("buffer");
143    Value * length = &*(args++);
144    length->setName("length");
145   
146    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
147    StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
148    kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy());
149    sourceK->setInitialArguments({buffer, length});
150    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
151   
152   
153    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
154    kernel::Kernel * s2pk = pxDriver.addKernelInstance<kernel::S2PKernel>(idb);
155    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
156   
157    StreamSetBuffer * LineFeedStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
158    kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
159    pxDriver.makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
160   
161    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
162   
163    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
164    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
165    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
166   
167    StreamSetBuffer * MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
168    kernel::Kernel * icgrepK = pxDriver.addKernelInstance<kernel::ICGrepKernel>(idb, pattern, std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"});
169    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
170   
171    StreamSetBuffer * MatchedLines = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
172    kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance<kernel::MatchedLinesKernel>(idb);
173    pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
174   
175    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance<kernel::ScanMatchKernel>(idb);
176    scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
177    pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
178    pxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
179    pxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
180   
181    pxDriver.generatePipelineIR();
182    pxDriver.deallocateBuffers();
183    idb->CreateRetVoid();
184    pxDriver.finalizeObject();
185   
186    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
187    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
188    f(search_buffer, bufferLength);
189    codegen::SegmentPipelineParallel = segParallelModeSave;
190}
191
192
193
194// Grep Engine construction and initialization.
195
196GrepEngine::GrepEngine() :
197    mSuppressFileMessages(false),
198    mPreferMMap(true),
199    mShowFileNames(false),
200    mStdinLabel("(stdin)"),
201    mShowLineNumbers(false),
202    mInitialTab(false),
203    mCaseInsensitive(false),
204    mInvertMatches(false),
205    mMaxCount(0),
206    mGrepDriver(nullptr),
207    mNextFileToGrep(0),
208    mNextFileToPrint(0),
209    grepMatchFound(false),
210    mGrepRecordBreak(GrepRecordBreakKind::LF),
211    mMoveMatchesToEOL(true),
212    mEngineThread(pthread_self()) {}
213
214GrepEngine::~GrepEngine() {
215    delete mGrepDriver;
216}
217
218QuietModeEngine::QuietModeEngine() : GrepEngine() {
219    mEngineKind = EngineKind::QuietMode;
220    mMoveMatchesToEOL = false;
221    mMaxCount = 1;
222}
223
224MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators) :
225    GrepEngine(), mRequiredCount(showFilesWithoutMatch) {
226    mEngineKind = EngineKind::MatchOnly;
227    mFileSuffix = useNullSeparators ? std::string("\0", 1) : "\n";
228    mMoveMatchesToEOL = false;
229    mMaxCount = 1;
230}
231
232CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
233    mEngineKind = EngineKind::CountOnly;
234    mFileSuffix = ":";
235}
236
237EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
238    mEngineKind = EngineKind::EmitMatches;
239    mFileSuffix = mInitialTab ? "\t:" : ":";
240}
241
242   
243void GrepEngine::setRecordBreak(GrepRecordBreakKind b) {
244    mGrepRecordBreak = b;
245}
246
247   
248
249   
250void GrepEngine::initFileResult(std::vector<std::string> & filenames) {
251    const unsigned n = filenames.size();
252    mResultStrs.resize(n);
253    mFileStatus.resize(n, FileStatus::Pending);
254    inputFiles = filenames;
255}
256
257void GrepEngine::initREs(std::vector<re::RE *> & REs) {
258    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
259        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
260    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
261        mBreakCC = re::makeByte(0);  // Null
262    } else {
263        mBreakCC = re::makeByte(0x0A); // LF
264    }
265    re::RE * anchorRE = mBreakCC;
266    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
267        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
268        anchorName->setDefinition(UCD::UnicodeBreakRE());
269        anchorRE = anchorName;
270    }
271   
272    mREs = REs;
273    bool allAnchored = true;
274    for(unsigned i = 0; i < mREs.size(); ++i) {
275        if (!hasEndAnchor(mREs[i])) allAnchored = false;
276        mREs[i] = resolveModesAndExternalSymbols(mREs[i], mCaseInsensitive);
277        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
278        mREs[i] = resolveAnchors(mREs[i], anchorRE);
279        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
280        mREs[i] = regular_expression_passes(mREs[i]);
281    }
282    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
283
284}
285
286
287   
288// Code Generation
289//
290// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
291
292unsigned LLVM_READNONE calculateMaxCountRate(const std::unique_ptr<kernel::KernelBuilder> & b) {
293    const unsigned packSize = b->getSizeTy()->getBitWidth();
294    return (packSize * packSize) / b->getBitBlockWidth();
295}
296   
297std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(StreamSetBuffer * ByteStream) {
298    auto & idb = mGrepDriver->getBuilder();
299    const unsigned segmentSize = codegen::SegmentSize;
300    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
301    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
302    const unsigned baseBufferSize = segmentSize * (mMaxCount > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
303    const unsigned encodingBits = 8;
304   
305   
306    //  Regular Expression Processing and Analysis Phase
307    const auto nREs = mREs.size();
308    bool hasGCB[nREs];
309    bool anyGCB = false;
310
311    for(unsigned i = 0; i < nREs; ++i) {
312        hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
313        anyGCB |= hasGCB[i];
314    }
315    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
316    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
317   
318    re::RE * prefixRE;
319    re::RE * suffixRE;
320    // For simple regular expressions with a small number of characters, we
321    // can bypass transposition and use the Direct CC compiler.
322    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
323    if (isSimple) {
324        mREs[0] = toUTF8(mREs[0]);
325    }
326    if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
327        std::vector<std::string> externalStreamNames;
328        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
329        if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
330            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
331            for (auto cc : CCs) {
332                auto ccName = makeName(cc);
333                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
334                std::string ccNameStr = ccName->getFullName();
335                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
336                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
337                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
338                externalStreamNames.push_back(ccNameStr);
339                icgrepInputSets.push_back(ccStream);
340            }
341        }
342        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
343        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
344        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
345        MatchResultsBufs[0] = MatchResults;
346        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
347        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
348    } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
349        std::vector<std::string> externalStreamNames;
350        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
351        if (MultithreadedSimpleRE) {
352            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
353            for (auto cc : CCs) {
354                auto ccName = makeName(cc);
355                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
356                std::string ccNameStr = ccName->getFullName();
357                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
358                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
359                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
360                externalStreamNames.push_back(ccNameStr);
361                icgrepInputSets.push_back(ccStream);
362            }
363        }
364        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
365        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
366        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
367        MatchResultsBufs[0] = MatchResults;
368        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
369        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
370    } else {
371       
372        StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
373        kernel::Kernel * s2pk = nullptr;
374        if (PabloTransposition) {
375            s2pk = mGrepDriver->addKernelInstance<kernel::S2P_PabloKernel>(idb);
376        }
377        else {
378            s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
379        }
380        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
381
382        StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
383        StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
384
385        StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
386        kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
387        mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
388       
389        kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
390        mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
391
392        if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
393            LineBreakStream = LineFeedStream;
394        } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
395            kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
396            mGrepDriver->makeKernelCall(breakK, {BasisBits}, {LineBreakStream});
397        } else {
398            LineBreakStream = UnicodeLB;
399        }
400       
401        std::map<std::string, StreamSetBuffer *> propertyStream;
402        if (PropertyKernels) {
403            for (auto p : mUnicodeProperties) {
404                auto name = p->getFullName();
405                StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
406                propertyStream.emplace(std::make_pair(name, s));
407                kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
408                mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
409            }
410        }
411        StreamSetBuffer * GCB_stream = nullptr;
412        if (anyGCB) {
413            GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
414            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
415            mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
416        }
417
418        for(unsigned i = 0; i < nREs; ++i) {
419            std::vector<std::string> externalStreamNames;
420            std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits};
421            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
422                externalStreamNames.push_back("UTF8_LB");
423                icgrepInputSets.push_back(LineBreakStream);
424                externalStreamNames.push_back("UTF8_nonfinal");
425                icgrepInputSets.push_back(RequiredStreams);
426            }
427            std::set<re::Name *> UnicodeProperties;
428            if (PropertyKernels) {
429                re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
430                for (auto p : UnicodeProperties) {
431                    auto name = p->getFullName();
432                    auto f = propertyStream.find(name);
433                    if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
434                    externalStreamNames.push_back(name);
435                    icgrepInputSets.push_back(f->second);
436                }
437            }
438            if (hasGCB[i]) {
439                externalStreamNames.push_back("\\b{g}");
440                icgrepInputSets.push_back(GCB_stream);
441            }
442            if (CC_Multiplexing) {
443                const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
444                StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
445                if (UnicodeSets.size() <= 1) {
446                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
447                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
448                    MatchResultsBufs[i] = MatchResults;
449                } else {
450                    mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
451                    mREs[i] = transformCCs(mpx.get(), mREs[i]);
452                    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
453                    auto numOfCharacterClasses = mpx_basis.size();
454                    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
455                    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
456                    mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
457    //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
458    //                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
459                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
460                    icgrepInputSets.push_back(CharClasses);
461                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
462                    MatchResultsBufs[i] = MatchResults;
463                }
464            } else {
465                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
466                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
467                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
468                MatchResultsBufs[i] = MatchResults;
469            }
470        }
471    }
472
473    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
474    if (mREs.size() > 1) {
475        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
476        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
477        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
478    }
479    StreamSetBuffer * Matches = MergedResults;
480    if (mMoveMatchesToEOL) {
481        StreamSetBuffer * OriginalMatches = Matches;
482        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
483        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
484        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
485    }
486    if (mInvertMatches) {
487        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
488        StreamSetBuffer * OriginalMatches = Matches;
489        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
490        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
491    }
492    if (mMaxCount > 0) {
493        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
494        untilK->setInitialArguments({idb->getSize(mMaxCount)});
495        StreamSetBuffer * const AllMatches = Matches;
496        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
497        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
498    }
499
500    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
501}
502
503// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
504// which returns a count of the matches found (possibly subject to a MaxCount).
505//
506
507void GrepEngine::grepCodeGen() {
508
509    assert (mGrepDriver == nullptr);
510    mGrepDriver = new ParabixDriver("engine");
511    auto & idb = mGrepDriver->getBuilder();
512    Module * M = idb->getModule();
513
514    const unsigned encodingBits = 8;
515
516    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), nullptr));
517    mainFunc->setCallingConv(CallingConv::C);
518    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
519    auto args = mainFunc->arg_begin();
520
521    Value * const useMMap = &*(args++);
522    useMMap->setName("useMMap");
523    Value * const fileDescriptor = &*(args++);
524    fileDescriptor->setName("fileDescriptor");
525
526    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
527    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
528    sourceK->setInitialArguments({useMMap, fileDescriptor});
529    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
530
531    StreamSetBuffer * LineBreakStream;
532    StreamSetBuffer * Matches;
533    std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream);
534
535    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance<kernel::PopcountKernel>(idb);
536    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
537    mGrepDriver->generatePipelineIR();
538    idb->setKernel(matchCountK);
539    Value * matchedLineCount = idb->getAccumulator("countResult");
540    matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
541    mGrepDriver->deallocateBuffers();
542    idb->CreateRet(matchedLineCount);
543    mGrepDriver->finalizeObject();
544}
545
546
547//
548//  Default Report Match:  lines are emitted with whatever line terminators are found in the
549//  input.  However, if the final line is not terminated, a new line is appended.
550//
551void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
552    mResultStr << mLinePrefix;
553    if (mShowLineNumbers) {
554        // Internally line numbers are counted from 0.  For display, adjust
555        // the line number so that lines are numbered from 1.
556        if (mInitialTab) {
557            mResultStr << lineNum+1 << "\t:";
558        }
559        else {
560            mResultStr << lineNum+1 << ":";
561        }
562    }
563    size_t bytes = line_end - line_start + 1;
564    mResultStr.write(line_start, bytes);
565    mLineCount++;
566    unsigned last_byte = *line_end;
567    mTerminated = (last_byte >= 0x0A) && (last_byte <= 0x0D);
568    if (LLVM_UNLIKELY(!mTerminated)) {
569        if (last_byte == 0x85) {  //  Possible NEL terminator.
570            mTerminated = (bytes >= 2) && (static_cast<unsigned>(line_end[-1]) == 0xC2);
571        }
572        else {
573            // Possible LS or PS terminators.
574            mTerminated = (bytes >= 3) && (static_cast<unsigned>(line_end[-2]) == 0xE2)
575                                       && (static_cast<unsigned>(line_end[-1]) == 0x80)
576                                       && ((last_byte == 0xA8) || (last_byte == 0xA9));
577        }
578    }
579}
580
581void EmitMatch::finalize_match(char * buffer_end) {
582    if (!mTerminated) mResultStr << "\n";
583}
584
585void EmitMatchesEngine::grepCodeGen() {
586    assert (mGrepDriver == nullptr);
587    mGrepDriver = new ParabixDriver("engine");
588    auto & idb = mGrepDriver->getBuilder();
589    Module * M = idb->getModule();
590
591    const unsigned encodingBits = 8;
592
593    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
594    mainFunc->setCallingConv(CallingConv::C);
595    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
596    auto args = mainFunc->arg_begin();
597
598    Value * const useMMap = &*(args++);
599    useMMap->setName("useMMap");
600    Value * const fileDescriptor = &*(args++);
601    fileDescriptor->setName("fileDescriptor");
602    Value * match_accumulator = &*(args++);
603    match_accumulator->setName("match_accumulator");
604
605    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
606    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
607    sourceK->setInitialArguments({useMMap, fileDescriptor});
608    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
609
610    StreamSetBuffer * LineBreakStream;
611    StreamSetBuffer * Matches;
612    std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream);
613
614    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
615    scanMatchK->setInitialArguments({match_accumulator});
616    mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
617    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
618    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
619
620    mGrepDriver->generatePipelineIR();
621    mGrepDriver->deallocateBuffers();
622    idb->CreateRet(idb->getInt64(0));
623    mGrepDriver->finalizeObject();
624}
625
626
627//
628//  The doGrep methods apply a GrepEngine to a single file, processing the results
629//  differently based on the engine type.
630
631uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
632    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor);
633    using namespace boost::filesystem;
634    path p(fileName);
635    bool useMMap = mPreferMMap;
636    if (p == "-") useMMap = false;
637    if (!is_regular_file(p)) useMMap = false;
638
639    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
640
641    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
642    if (fileDescriptor == -1) return 0;
643
644    uint64_t grepResult = f(useMMap, fileDescriptor);
645    close(fileDescriptor);
646    return grepResult;
647}
648
649uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
650    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
651    if (mShowFileNames) mResultStrs[fileIdx] << linePrefix(fileName);
652    mResultStrs[fileIdx] << grepResult << "\n";
653    return grepResult;
654}
655
656std::string GrepEngine::linePrefix(std::string fileName) {
657    if (!mShowFileNames) return "";
658    if (fileName == "-") {
659        return mStdinLabel + mFileSuffix;
660    }
661    else {
662        return fileName + mFileSuffix;
663    }
664}
665
666uint64_t MatchOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
667    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
668    if (grepResult == mRequiredCount) {
669       mResultStrs[fileIdx] << linePrefix(fileName);
670    }
671    return grepResult;
672}
673
674uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
675    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t accum_addr);
676    using namespace boost::filesystem;
677    path p(fileName);
678    bool useMMap = mPreferMMap;
679    if (p == "-") useMMap = false;
680    if (!is_regular_file(p)) useMMap = false;
681    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
682    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
683    if (fileDescriptor == -1) return 0;
684    EmitMatch accum(linePrefix(fileName), mShowLineNumbers, mInitialTab, mResultStrs[fileIdx]);
685    f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&accum));
686    close(fileDescriptor);
687    if (accum.mLineCount > 0) grepMatchFound = true;
688    return accum.mLineCount;
689}
690
691// Open a file and return its file desciptor.
692int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) {
693    if (fileName == "-") {
694        return STDIN_FILENO;
695    }
696    else {
697        struct stat sb;
698        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
699        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
700            if (!mSuppressFileMessages) {
701                if (errno == EACCES) {
702                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
703                }
704                else if (errno == ENOENT) {
705                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
706                }
707                else {
708                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
709                }
710            }
711            return fileDescriptor;
712        }
713        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
714            if (!mSuppressFileMessages) {
715                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
716            }
717            close(fileDescriptor);
718            return -1;
719        }
720        return fileDescriptor;
721    }
722}
723
724// The process of searching a group of files may use a sequential or a task
725// parallel approach.
726
727void * DoGrepThreadFunction(void *args) {
728    return reinterpret_cast<GrepEngine *>(args)->DoGrepThreadMethod();
729}
730
731bool GrepEngine::searchAllFiles() {
732    const unsigned numOfThreads = std::min(static_cast<unsigned>(Threads), static_cast<unsigned>(inputFiles.size())); 
733    std::vector<pthread_t> threads(numOfThreads);
734
735    for(unsigned long i = 1; i < numOfThreads; ++i) {
736        const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
737        if (rc) {
738            llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
739        }
740    }
741    // Main thread also does the work;
742    DoGrepThreadMethod();
743    for(unsigned i = 1; i < numOfThreads; ++i) {
744        void * status = nullptr;
745        const int rc = pthread_join(threads[i], &status);
746        if (rc) {
747            llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
748        }
749    }
750    return grepMatchFound;
751}
752
753
754// DoGrep thread function.
755void * GrepEngine::DoGrepThreadMethod() {
756
757    unsigned fileIdx = mNextFileToGrep++;
758    while (fileIdx < inputFiles.size()) {
759        if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
760            errs() << "Tracing " << inputFiles[fileIdx] << "\n";
761        }
762        const auto grepResult = doGrep(inputFiles[fileIdx], fileIdx);
763        mFileStatus[fileIdx] = FileStatus::GrepComplete;
764        if (grepResult > 0) {
765            grepMatchFound = true;
766        }
767        if ((mEngineKind == EngineKind::QuietMode) && grepMatchFound) {
768            if (pthread_self() != mEngineThread) {
769                pthread_exit(nullptr);
770            }
771            return nullptr;
772        }
773        fileIdx = mNextFileToGrep++;
774    }
775
776    unsigned printIdx = mNextFileToPrint++;
777    while (printIdx < inputFiles.size()) {
778        const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
779        if (readyToPrint) {
780            const auto output = mResultStrs[printIdx].str();
781            if (!output.empty()) {
782                llvm::outs() << output;
783            }
784            mFileStatus[printIdx] = FileStatus::PrintComplete;
785            printIdx = mNextFileToPrint++;
786        } else {
787            mGrepDriver->performIncrementalCacheCleanupStep();
788        }
789        sched_yield();
790    }
791
792    if (pthread_self() != mEngineThread) {
793        pthread_exit(nullptr);
794    } else {
795        // Always perform one final cache cleanup step.
796        mGrepDriver->performIncrementalCacheCleanupStep();
797    }
798    return nullptr;
799}
800
801   
802   
803InternalSearchEngine::InternalSearchEngine() :
804    mGrepRecordBreak(GrepRecordBreakKind::LF),
805    mCaseInsensitive(false),
806    mGrepDriver(nullptr),
807    grepMatchFound(false) {}
808   
809InternalSearchEngine::~InternalSearchEngine() {
810    delete mGrepDriver;
811}
812
813void InternalSearchEngine::grepCodeGen(re::RE * matchingRE, re::RE * excludedRE, MatchAccumulator * accum) {
814    mGrepDriver = new ParabixDriver("InternalEngine");
815    auto & idb = mGrepDriver->getBuilder();
816    Module * M = idb->getModule();
817   
818    const unsigned encodingBits = 8;
819    const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
820    auto segParallelModeSave = codegen::SegmentPipelineParallel;
821    codegen::SegmentPipelineParallel = false;
822   
823    re::CC * breakCC = nullptr;
824    if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
825        breakCC = re::makeByte(0);
826    } else {// if (mGrepRecordBreak == GrepRecordBreakKind::LF)
827        breakCC = re::makeByte(0x0A);
828    }
829   
830    if (matchingRE) {
831        matchingRE = resolveCaseInsensitiveMode(matchingRE, mCaseInsensitive);
832        matchingRE = regular_expression_passes(matchingRE);
833        matchingRE = re::exclude_CC(excludedRE, breakCC);
834        matchingRE = resolveAnchors(excludedRE, breakCC);
835    }
836   
837    if (excludedRE) {
838        excludedRE = resolveCaseInsensitiveMode(matchingRE, mCaseInsensitive);
839        excludedRE = regular_expression_passes(matchingRE);
840        excludedRE = re::exclude_CC(excludedRE, breakCC);
841        excludedRE = resolveAnchors(excludedRE, breakCC);
842    }
843   
844    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
845    mainFunc->setCallingConv(CallingConv::C);
846    auto args = mainFunc->arg_begin();
847    Value * const buffer = &*(args++);
848    buffer->setName("buffer");
849    Value * length = &*(args++);
850    length->setName("length");
851   
852    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
853    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
854    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy());
855    sourceK->setInitialArguments({buffer, length});
856    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
857   
858    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
859    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
860    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
861   
862    StreamSetBuffer * RecordBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
863    std::string RBname = (mGrepRecordBreak == GrepRecordBreakKind::Null) ? "Null" : "LF";
864    kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC}, 8);
865    mGrepDriver->makeKernelCall(breakK, {BasisBits}, {RecordBreakStream});
866   
867    StreamSetBuffer * MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
868   
869    std::vector<std::string> externalStreamNames;
870    if (matchingRE) {
871        kernel::Kernel * includeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, matchingRE, externalStreamNames);
872        mGrepDriver->makeKernelCall(includeK, {BasisBits}, {MatchingRecords});
873    }
874   
875    if (excludedRE) {
876        StreamSetBuffer * ExcludedRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
877        kernel::Kernel * excludeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, excludedRE, externalStreamNames);
878        mGrepDriver->makeKernelCall(excludeK, {BasisBits}, {ExcludedRecords});
879       
880        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
881        if (matchingRE) {
882            StreamSetBuffer * nonExcluded = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
883            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {nonExcluded});
884            StreamSetBuffer * included = MatchingRecords;
885            kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, 2);
886            mGrepDriver->makeKernelCall(streamsMergeK, {included, nonExcluded}, {MatchingRecords});
887        }
888        else {
889            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {MatchingRecords});
890        }
891    }
892   
893    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
894    scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
895    mGrepDriver->makeKernelCall(scanMatchK, {MatchingRecords, RecordBreakStream, ByteStream}, {});
896    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
897    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
898   
899    mGrepDriver->generatePipelineIR();
900    mGrepDriver->deallocateBuffers();
901    idb->CreateRetVoid();
902    mGrepDriver->finalizeObject();
903}
904
905void InternalSearchEngine::doGrep(const char * search_buffer, size_t bufferLength) {
906    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
907    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
908    f(search_buffer, bufferLength);
909}
910
911}
Note: See TracBrowser for help on using the repository browser.