source: icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp @ 5897

Last change on this file since 5897 was 5897, checked in by cameron, 16 months ago

RE compiler restructuring progress

File size: 28.4 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#include <set>
7#include "grep_engine.h"
8#include "grep_interface.h"
9#include <llvm/IR/Module.h>
10#include <boost/filesystem.hpp>
11#include <UCD/resolve_properties.h>
12#include <kernels/charclasses.h>
13#include <kernels/cc_kernel.h>
14#include <kernels/grep_kernel.h>
15#include <kernels/UCD_property_kernel.h>
16#include <kernels/grapheme_kernel.h>
17#include <kernels/linebreak_kernel.h>
18#include <kernels/streams_merge.h>
19#include <kernels/source_kernel.h>
20#include <kernels/s2p_kernel.h>
21#include <kernels/scanmatchgen.h>
22#include <kernels/streamset.h>
23#include <kernels/until_n.h>
24#include <kernels/kernel_builder.h>
25#include <pablo/pablo_kernel.h>
26#include <re/re_cc.h>
27#include <re/re_name.h>
28#include <re/casing.h>
29#include <re/exclude_CC.h>
30#include <re/re_toolchain.h>
31#include <toolchain/toolchain.h>
32#include <re/re_name_resolve.h>
33#include <re/re_name_gather.h>
34#include <re/re_collect_unicodesets.h>
35#include <re/re_multiplex.h>
36#include <re/grapheme_clusters.h>
37#include <re/printer_re.h>
38#include <toolchain/toolchain.h>
39#include <toolchain/cpudriver.h>
40#include <iostream>
41#include <cc/multiplex_CCs.h>
42#include <llvm/Support/raw_ostream.h>
43#include <util/aligned_allocator.h>
44#include <sys/stat.h>
45#include <fcntl.h>
46#include <errno.h>
47#include <llvm/ADT/STLExtras.h> // for make_unique
48#include <llvm/Support/CommandLine.h>
49#include <llvm/Support/Debug.h>
50#include <sched.h>
51
52using namespace parabix;
53using namespace llvm;
54using namespace cc;
55using namespace kernel;
56
57static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(2));
58static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
59static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
60static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
61
62
63namespace grep {
64   
65
66void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end) {
67    reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
68}
69
70void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end) {
71    reinterpret_cast<MatchAccumulator *>(accum_addr)->finalize_match(buffer_end);
72}
73
74void grepBuffer(re::RE * pattern, const char * search_buffer, size_t bufferLength, MatchAccumulator * accum) {
75    const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
76   
77    pattern = resolveCaseInsensitiveMode(pattern, false);
78    pattern = regular_expression_passes(pattern);
79    pattern = re::exclude_CC(pattern, re::makeByte(0x0A));
80    pattern = resolveAnchors(pattern, re::makeByte(0x0A));
81
82    ParabixDriver pxDriver("codepointEngine");
83    auto & idb = pxDriver.getBuilder();
84    Module * M = idb->getModule();
85   
86    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
87    mainFunc->setCallingConv(CallingConv::C);
88    auto args = mainFunc->arg_begin();
89    Value * const buffer = &*(args++);
90    buffer->setName("buffer");
91    Value * length = &*(args++);
92    length->setName("length");
93   
94    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
95   
96    StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
97    kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy());
98    sourceK->setInitialArguments({buffer, length});
99    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
100   
101   
102    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
103    kernel::Kernel * s2pk = pxDriver.addKernelInstance<kernel::S2PKernel>(idb);
104    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
105   
106    StreamSetBuffer * LineFeedStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
107    kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
108    pxDriver.makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
109   
110    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
111   
112    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
113    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
114    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
115   
116    StreamSetBuffer * MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
117    kernel::Kernel * icgrepK = pxDriver.addKernelInstance<kernel::ICGrepKernel>(idb, pattern, std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"});
118    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
119   
120    StreamSetBuffer * MatchedLines = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
121    kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance<kernel::MatchedLinesKernel>(idb);
122    pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
123   
124    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance<kernel::ScanMatchKernel>(idb);
125    scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
126    pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
127    pxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
128    pxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
129   
130    pxDriver.generatePipelineIR();
131    pxDriver.deallocateBuffers();
132    idb->CreateRetVoid();
133    pxDriver.finalizeObject();
134   
135    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
136    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
137    f(search_buffer, bufferLength);
138}
139
140
141
142// Grep Engine construction and initialization.
143
144GrepEngine::GrepEngine() :
145    mGrepDriver(nullptr),
146    mNextFileToGrep(0),
147    mNextFileToPrint(0),
148    grepMatchFound(false),
149    mGrepRecordBreak(GrepRecordBreakKind::Unicode),
150    mMoveMatchesToEOL(true),
151    mEngineThread(pthread_self()) {}
152
153GrepEngine::~GrepEngine() {
154    delete mGrepDriver;
155}
156
157QuietModeEngine::QuietModeEngine() : GrepEngine() {
158    mMoveMatchesToEOL = false;
159}
160
161MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch) :
162    GrepEngine(), mRequiredCount(showFilesWithoutMatch) {
163    mFileSuffix = NullFlag ? std::string("\0", 1) : "\n";
164    mMoveMatchesToEOL = false;
165}
166
167CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
168    mFileSuffix = ":";
169}
170
171EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
172    mFileSuffix = InitialTabFlag ? "\t:" : ":";
173    if (LineRegexpFlag) mMoveMatchesToEOL = false;
174}
175
176   
177void GrepEngine::setRecordBreak(GrepRecordBreakKind b) {
178    mGrepRecordBreak = b;
179}
180
181void GrepEngine::initFileResult(std::vector<std::string> & filenames) {
182    const unsigned n = filenames.size();
183    mResultStrs.resize(n);
184    mFileStatus.resize(n, FileStatus::Pending);
185    inputFiles = filenames;
186}
187
188// Code Generation
189//
190// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
191
192unsigned LLVM_READNONE calculateMaxCountRate(const std::unique_ptr<kernel::KernelBuilder> & b) {
193    const unsigned packSize = b->getSizeTy()->getBitWidth();
194    return (packSize * packSize) / b->getBitBlockWidth();
195}
196
197std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(std::vector<re::RE *> & REs, StreamSetBuffer * ByteStream) {
198    auto & idb = mGrepDriver->getBuilder();
199    const unsigned segmentSize = codegen::SegmentSize;
200    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
201    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
202    const unsigned baseBufferSize = segmentSize * (MaxCountFlag > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
203    const unsigned encodingBits = 8;
204   
205   
206    //  Regular Expression Processing and Analysis Phase
207    const auto nREs = REs.size();
208    bool hasGCB[nREs];
209    bool anyGCB = false;
210   
211    std::set<re::Name *> UnicodeProperties;
212   
213    re::CC * breakCC = nullptr;
214    std::string breakName;
215    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
216        breakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
217    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
218        breakCC = re::makeByte(0);  // Null
219    } else {
220        breakCC = re::makeByte(0x0A); // LF
221    }
222    re::RE * anchorRE = breakCC;
223    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
224        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
225        anchorName->setDefinition(UCD::UnicodeBreakRE());
226        anchorRE = anchorName;
227    }
228
229    for(unsigned i = 0; i < nREs; ++i) {
230        REs[i] = resolveModesAndExternalSymbols(REs[i]);
231        REs[i] = re::exclude_CC(REs[i], breakCC);
232        REs[i] = resolveAnchors(REs[i], anchorRE);
233        re::gatherUnicodeProperties(REs[i], UnicodeProperties);
234        REs[i] = regular_expression_passes(REs[i]);
235        hasGCB[i] = hasGraphemeClusterBoundary(REs[i]);
236        anyGCB |= hasGCB[i];
237    }
238   
239    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
240    kernel::Kernel * s2pk = nullptr;
241    if (PabloTransposition) {
242        s2pk = mGrepDriver->addKernelInstance<kernel::S2P_PabloKernel>(idb);
243    }
244    else {
245        s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
246    }
247    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
248
249    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
250    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
251    StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
252
253    StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
254    kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
255    mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
256   
257    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
258    mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
259
260    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
261        LineBreakStream = LineFeedStream;
262    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
263        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{breakCC}, 8);
264        mGrepDriver->makeKernelCall(breakK, {BasisBits}, {LineBreakStream});
265    } else {
266        LineBreakStream = UnicodeLB;
267    }
268   
269    std::map<std::string, StreamSetBuffer *> propertyStream;
270    if (PropertyKernels) {
271        for (auto p : UnicodeProperties) {
272            auto name = p->getFullName();
273            StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
274            propertyStream.emplace(std::make_pair(name, s));
275            kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
276            mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
277        }
278    }
279    StreamSetBuffer * GCB_stream = nullptr;
280    if (anyGCB) {
281        GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
282        kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
283        mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
284    }
285
286    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
287    for(unsigned i = 0; i < nREs; ++i) {
288        std::vector<std::string> externalStreamNames = std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"};
289        std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits, LineBreakStream, RequiredStreams};
290        std::set<re::Name *> UnicodeProperties;
291        if (PropertyKernels) {
292            re::gatherUnicodeProperties(REs[i], UnicodeProperties);
293            for (auto p : UnicodeProperties) {
294                auto name = p->getFullName();
295                auto f = propertyStream.find(name);
296                if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
297                externalStreamNames.push_back(name);
298                icgrepInputSets.push_back(f->second);
299            }
300        }
301        if (hasGCB[i]) {
302            externalStreamNames.push_back("\\b{g}");
303            icgrepInputSets.push_back(GCB_stream);
304        }
305        if (CC_Multiplexing) {
306            const auto UnicodeSets = re::collectUnicodeSets(REs[i], std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
307            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
308            if (UnicodeSets.size() <= 1) {
309                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames);
310                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
311                MatchResultsBufs[i] = MatchResults;
312            } else {
313                mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
314                REs[i] = transformCCs(mpx.get(), REs[i]);
315                std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
316                auto numOfCharacterClasses = mpx_basis.size();
317                StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
318                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
319                mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
320//                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
321//                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
322                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
323                icgrepInputSets.push_back(CharClasses);
324                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
325                MatchResultsBufs[i] = MatchResults;
326            }
327        } else {
328            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
329            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames);
330            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
331            MatchResultsBufs[i] = MatchResults;
332        }
333    }
334    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
335    if (REs.size() > 1) {
336        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
337        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, REs.size());
338        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
339    }
340    StreamSetBuffer * Matches = MergedResults;
341
342    if (mMoveMatchesToEOL) {
343        StreamSetBuffer * OriginalMatches = Matches;
344        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
345        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
346        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
347    }
348
349    if (InvertMatchFlag) {
350        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
351        StreamSetBuffer * OriginalMatches = Matches;
352        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
353        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
354    }
355    if (MaxCountFlag > 0) {
356        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
357        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
358        StreamSetBuffer * const AllMatches = Matches;
359        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
360        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
361    }
362    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
363}
364
365// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
366// which returns a count of the matches found (possibly subject to a MaxCount).
367//
368
369void GrepEngine::grepCodeGen(std::vector<re::RE *> REs) {
370
371    assert (mGrepDriver == nullptr);
372    mGrepDriver = new ParabixDriver("engine");
373    auto & idb = mGrepDriver->getBuilder();
374    Module * M = idb->getModule();
375
376    const unsigned encodingBits = 8;
377
378    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), nullptr));
379    mainFunc->setCallingConv(CallingConv::C);
380    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
381    auto args = mainFunc->arg_begin();
382
383    Value * const fileDescriptor = &*(args++);
384    fileDescriptor->setName("fileDescriptor");
385
386    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
387    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
388    sourceK->setInitialArguments({fileDescriptor});
389    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
390
391    StreamSetBuffer * LineBreakStream;
392    StreamSetBuffer * Matches;
393    std::tie(LineBreakStream, Matches) = grepPipeline(REs, ByteStream);
394
395    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance<kernel::PopcountKernel>(idb);
396    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
397    mGrepDriver->generatePipelineIR();
398    idb->setKernel(matchCountK);
399    Value * matchedLineCount = idb->getAccumulator("countResult");
400    matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
401    mGrepDriver->deallocateBuffers();
402    idb->CreateRet(matchedLineCount);
403    mGrepDriver->finalizeObject();
404}
405
406//
407// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
408// matched lines.
409
410class EmitMatch : public MatchAccumulator {
411    friend class EmitMatchesEngine;
412public:
413    EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
414    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
415    void finalize_match(char * buffer_end) override;
416protected:
417    std::string mLinePrefix;
418    size_t mLineCount;
419    bool mTerminated;
420    std::ostringstream & mResultStr;
421};
422
423//
424//  Default Report Match:  lines are emitted with whatever line terminators are found in the
425//  input.  However, if the final line is not terminated, a new line is appended.
426//
427void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
428    if (WithFilenameFlag) {
429        mResultStr << mLinePrefix;
430    }
431    if (LineNumberFlag) {
432        // Internally line numbers are counted from 0.  For display, adjust
433        // the line number so that lines are numbered from 1.
434        if (InitialTabFlag) {
435            mResultStr << lineNum+1 << "\t:";
436        }
437        else {
438            mResultStr << lineNum+1 << ":";
439        }
440    }
441    size_t bytes = line_end - line_start + 1;
442    mResultStr.write(line_start, bytes);
443    mLineCount++;
444    unsigned last_byte = *line_end;
445    mTerminated = (last_byte >= 0x0A) && (last_byte <= 0x0D);
446    if (LLVM_UNLIKELY(!mTerminated)) {
447        if (last_byte == 0x85) {  //  Possible NEL terminator.
448            mTerminated = (bytes >= 2) && (static_cast<unsigned>(line_end[-1]) == 0xC2);
449        }
450        else {
451            // Possible LS or PS terminators.
452            mTerminated = (bytes >= 3) && (static_cast<unsigned>(line_end[-2]) == 0xE2)
453                                       && (static_cast<unsigned>(line_end[-1]) == 0x80)
454                                       && ((last_byte == 0xA8) || (last_byte == 0xA9));
455        }
456    }
457}
458
459void EmitMatch::finalize_match(char * buffer_end) {
460    if (!mTerminated) mResultStr << "\n";
461}
462
463void EmitMatchesEngine::grepCodeGen(std::vector<re::RE *> REs) {
464    assert (mGrepDriver == nullptr);
465    mGrepDriver = new ParabixDriver("engine");
466    auto & idb = mGrepDriver->getBuilder();
467    Module * M = idb->getModule();
468
469    const unsigned encodingBits = 8;
470
471    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
472    mainFunc->setCallingConv(CallingConv::C);
473    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
474    auto args = mainFunc->arg_begin();
475
476    Value * const fileDescriptor = &*(args++);
477    fileDescriptor->setName("fileDescriptor");
478    Value * match_accumulator = &*(args++);
479    match_accumulator->setName("match_accumulator");
480
481    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
482    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
483    sourceK->setInitialArguments({fileDescriptor});
484    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
485
486    StreamSetBuffer * LineBreakStream;
487    StreamSetBuffer * Matches;
488    std::tie(LineBreakStream, Matches) = grepPipeline(REs, ByteStream);
489
490    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
491    scanMatchK->setInitialArguments({match_accumulator});
492    mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
493    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
494    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
495
496    mGrepDriver->generatePipelineIR();
497    mGrepDriver->deallocateBuffers();
498    idb->CreateRet(idb->getInt64(0));
499    mGrepDriver->finalizeObject();
500}
501
502
503//
504//  The doGrep methods apply a GrepEngine to a single file, processing the results
505//  differently based on the engine type.
506
507uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
508    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
509    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
510
511    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
512    if (fileDescriptor == -1) return 0;
513
514    uint64_t grepResult = f(fileDescriptor);
515    close(fileDescriptor);
516    return grepResult;
517}
518
519uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
520    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
521    if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
522    mResultStrs[fileIdx] << grepResult << "\n";
523    return grepResult;
524}
525
526std::string GrepEngine::linePrefix(std::string fileName) {
527    if (fileName == "-") {
528        return LabelFlag + mFileSuffix;
529    }
530    else {
531        return fileName + mFileSuffix;
532    }
533}
534
535uint64_t MatchOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
536    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
537    if (grepResult == mRequiredCount) {
538       mResultStrs[fileIdx] << linePrefix(fileName);
539    }
540    return grepResult;
541}
542
543uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
544    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, intptr_t accum_addr);
545    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
546
547    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
548    if (fileDescriptor == -1) return 0;
549    EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx]);
550    f(fileDescriptor, reinterpret_cast<intptr_t>(&accum));
551    close(fileDescriptor);
552    if (accum.mLineCount > 0) grepMatchFound = true;
553    return accum.mLineCount;
554}
555
556// Open a file and return its file desciptor.
557int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) {
558    if (fileName == "-") {
559        return STDIN_FILENO;
560    }
561    else {
562        struct stat sb;
563        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
564        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
565            if (!NoMessagesFlag) {
566                if (errno == EACCES) {
567                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
568                }
569                else if (errno == ENOENT) {
570                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
571                }
572                else {
573                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
574                }
575            }
576            return fileDescriptor;
577        }
578        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
579            if (!NoMessagesFlag) {
580                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
581            }
582            close(fileDescriptor);
583            return -1;
584        }
585        return fileDescriptor;
586    }
587}
588
589// The process of searching a group of files may use a sequential or a task
590// parallel approach.
591
592void * DoGrepThreadFunction(void *args) {
593    return reinterpret_cast<GrepEngine *>(args)->DoGrepThreadMethod();
594}
595
596bool GrepEngine::searchAllFiles() {
597    const unsigned numOfThreads = std::min(static_cast<unsigned>(Threads), static_cast<unsigned>(inputFiles.size())); 
598    std::vector<pthread_t> threads(numOfThreads);
599
600    for(unsigned long i = 1; i < numOfThreads; ++i) {
601        const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
602        if (rc) {
603            llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
604        }
605    }
606    // Main thread also does the work;
607
608    DoGrepThreadMethod();
609    for(unsigned i = 1; i < numOfThreads; ++i) {
610        void * status = nullptr;
611        const int rc = pthread_join(threads[i], &status);
612        if (rc) {
613            llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
614        }
615    }
616    return grepMatchFound;
617}
618
619
620// DoGrep thread function.
621void * GrepEngine::DoGrepThreadMethod() {
622
623    unsigned fileIdx = mNextFileToGrep++;
624    while (fileIdx < inputFiles.size()) {
625        const auto grepResult = doGrep(inputFiles[fileIdx], fileIdx);
626        mFileStatus[fileIdx] = FileStatus::GrepComplete;
627        if (grepResult > 0) {
628            grepMatchFound = true;
629        }
630        if (QuietMode && grepMatchFound) {
631            if (pthread_self() != mEngineThread) {
632                pthread_exit(nullptr);
633            }
634            return nullptr;
635        }
636        fileIdx = mNextFileToGrep++;
637    }
638
639    unsigned printIdx = mNextFileToPrint++;
640    while (printIdx < inputFiles.size()) {
641        const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
642        if (readyToPrint) {
643            const auto output = mResultStrs[printIdx].str();
644            if (!output.empty()) {
645                llvm::outs() << output;
646            }
647            mFileStatus[printIdx] = FileStatus::PrintComplete;
648            printIdx = mNextFileToPrint++;
649        } else {
650            mGrepDriver->performIncrementalCacheCleanupStep();
651        }
652        sched_yield();
653    }
654
655    if (pthread_self() != mEngineThread) {
656        pthread_exit(nullptr);
657    } else {
658        // Always perform one final cache cleanup step.
659        mGrepDriver->performIncrementalCacheCleanupStep();
660    }
661    return nullptr;
662}
663
664}
Note: See TracBrowser for help on using the repository browser.