source: icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp @ 5892

Last change on this file since 5892 was 5892, checked in by cameron, 20 months ago

Restructuring: integrating grep_engine and grep_pipeline

File size: 27.7 KB
RevLine 
[4324]1/*
[5892]2 *  Copyright (c) 2018 International Characters.
[4324]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
[5881]6#include <set>
[5234]7#include "grep_engine.h"
[5476]8#include "grep_interface.h"
[5267]9#include <llvm/IR/Module.h>
[5234]10#include <boost/filesystem.hpp>
[5206]11#include <UCD/resolve_properties.h>
[5585]12#include <kernels/charclasses.h>
[5142]13#include <kernels/cc_kernel.h>
[5404]14#include <kernels/grep_kernel.h>
[5887]15#include <kernels/UCD_property_kernel.h>
[5881]16#include <kernels/grapheme_kernel.h>
[5357]17#include <kernels/linebreak_kernel.h>
[5338]18#include <kernels/streams_merge.h>
[5429]19#include <kernels/source_kernel.h>
[5234]20#include <kernels/s2p_kernel.h>
21#include <kernels/scanmatchgen.h>
22#include <kernels/streamset.h>
[5450]23#include <kernels/until_n.h>
[5436]24#include <kernels/kernel_builder.h>
[5087]25#include <pablo/pablo_kernel.h>
[5234]26#include <re/re_cc.h>
[5881]27#include <re/re_name.h>
[5769]28#include <re/casing.h>
[5779]29#include <re/exclude_CC.h>
[5234]30#include <re/re_toolchain.h>
[5425]31#include <toolchain/toolchain.h>
[5770]32#include <re/re_name_resolve.h>
[5887]33#include <re/re_name_gather.h>
[5585]34#include <re/re_collect_unicodesets.h>
35#include <re/re_multiplex.h>
[5772]36#include <re/grapheme_clusters.h>
[5801]37#include <re/printer_re.h>
[5700]38#include <toolchain/toolchain.h>
[5464]39#include <toolchain/cpudriver.h>
[5234]40#include <iostream>
[5369]41#include <cc/multiplex_CCs.h>
[5377]42#include <llvm/Support/raw_ostream.h>
[5418]43#include <util/aligned_allocator.h>
[5386]44#include <sys/stat.h>
[5418]45#include <fcntl.h>
[5484]46#include <errno.h>
[5696]47#include <llvm/ADT/STLExtras.h> // for make_unique
[5700]48#include <llvm/Support/CommandLine.h>
[5735]49#include <llvm/Support/Debug.h>
[5762]50#include <sched.h>
[5377]51
[5241]52using namespace parabix;
[5267]53using namespace llvm;
[5795]54using namespace cc;
[5861]55using namespace kernel;
[5795]56
[5703]57static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(2));
[5837]58static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
[5881]59static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
[5887]60static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
[5241]61
[5892]62
[5473]63namespace grep {
[5892]64   
[5473]65
[5892]66void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end) {
67    reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
68}
69
70void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end) {
71    reinterpret_cast<MatchAccumulator *>(accum_addr)->finalize_match(buffer_end);
72}
73
74void grepBuffer(re::RE * pattern, const char * search_buffer, size_t bufferLength, MatchAccumulator * accum) {
75    const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
76   
77    pattern = resolveCaseInsensitiveMode(pattern, false);
78    pattern = regular_expression_passes(pattern);
79   
80   
81    ParabixDriver pxDriver("codepointEngine");
82    auto & idb = pxDriver.getBuilder();
83    Module * M = idb->getModule();
84   
85    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
86    mainFunc->setCallingConv(CallingConv::C);
87    auto args = mainFunc->arg_begin();
88    Value * const buffer = &*(args++);
89    buffer->setName("buffer");
90    Value * length = &*(args++);
91    length->setName("length");
92   
93    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
94   
95    StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
96    kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy());
97    sourceK->setInitialArguments({buffer, length});
98    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
99   
100   
101    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
102    kernel::Kernel * s2pk = pxDriver.addKernelInstance<kernel::S2PKernel>(idb);
103    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
104   
105    StreamSetBuffer * LineFeedStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
106    kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
107    pxDriver.makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
108   
109    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
110   
111    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
112    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
113    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
114   
115    StreamSetBuffer * MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
116    kernel::Kernel * icgrepK = pxDriver.addKernelInstance<kernel::ICGrepKernel>(idb, pattern, std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"});
117    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
118   
119    StreamSetBuffer * MatchedLines = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
120    kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance<kernel::MatchedLinesKernel>(idb);
121    pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
122   
123    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance<kernel::ScanMatchKernel>(idb);
124    scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
125    pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
126    pxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
127    pxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
128   
129    pxDriver.generatePipelineIR();
130    pxDriver.deallocateBuffers();
131    idb->CreateRetVoid();
132    pxDriver.finalizeObject();
133   
134    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
135    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
136    f(search_buffer, bufferLength);
137}
138
139
140
[5704]141// Grep Engine construction and initialization.
[5770]142
[5704]143GrepEngine::GrepEngine() :
144    mGrepDriver(nullptr),
[5735]145    mNextFileToGrep(0),
146    mNextFileToPrint(0),
[5704]147    grepMatchFound(false),
[5735]148    mMoveMatchesToEOL(true),
149    mEngineThread(pthread_self()) {}
[5770]150
[5704]151GrepEngine::~GrepEngine() {
152    delete mGrepDriver;
153}
[5770]154
[5704]155QuietModeEngine::QuietModeEngine() : GrepEngine() {
156    mMoveMatchesToEOL = false;
157}
[5473]158
[5704]159MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch) :
160    GrepEngine(), mRequiredCount(showFilesWithoutMatch) {
161    mFileSuffix = NullFlag ? std::string("\0", 1) : "\n";
162    mMoveMatchesToEOL = false;
163}
[5484]164
[5704]165CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
166    mFileSuffix = ":";
167}
[5484]168
[5704]169EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
170    mFileSuffix = InitialTabFlag ? "\t:" : ":";
171    if (LineRegexpFlag) mMoveMatchesToEOL = false;
[5484]172}
[5704]173
174void GrepEngine::initFileResult(std::vector<std::string> & filenames) {
[5732]175    const unsigned n = filenames.size();
[5704]176    mResultStrs.resize(n);
[5771]177    mFileStatus.resize(n, FileStatus::Pending);
[5704]178    inputFiles = filenames;
179}
180
181// Code Generation
182//
183// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
184
[5831]185unsigned LLVM_READNONE calculateMaxCountRate(const std::unique_ptr<kernel::KernelBuilder> & b) {
186    const unsigned packSize = b->getSizeTy()->getBitWidth();
187    return (packSize * packSize) / b->getBitBlockWidth();
188}
189
[5704]190std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(std::vector<re::RE *> & REs, StreamSetBuffer * ByteStream) {
191    auto & idb = mGrepDriver->getBuilder();
192    const unsigned segmentSize = codegen::SegmentSize;
193    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
[5831]194    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
195    const unsigned baseBufferSize = segmentSize * (MaxCountFlag > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
[5704]196    const unsigned encodingBits = 8;
[5887]197   
198   
199    //  Regular Expression Processing and Analysis Phase
200    const auto nREs = REs.size();
201    bool hasGCB[nREs];
202    bool anyGCB = false;
203   
204    std::set<re::Name *> UnicodeProperties;
205   
206    for(unsigned i = 0; i < nREs; ++i) {
207        REs[i] = resolveModesAndExternalSymbols(REs[i]);
208        REs[i] = excludeUnicodeLineBreak(REs[i]);
209        re::gatherUnicodeProperties(REs[i], UnicodeProperties);
210       //re::Name * unicodeLB = re::makeName("UTF8_LB", re::Name::Type::Unicode);
211        //unicodeLB->setDefinition(re::makeCC(0x0A));
212        //REs[i] = resolveAnchors(REs[i], unicodeLB);
213        REs[i] = regular_expression_passes(REs[i]);
214        hasGCB[i] = hasGraphemeClusterBoundary(REs[i]);
215        anyGCB |= hasGCB[i];
216    }
217   
[5831]218    StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
219
220    #ifdef USE_DIRECT_LF_BUILDER
[5861]221    kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()});
[5831]222    mGrepDriver->makeKernelCall(linefeedK, {ByteStream}, {LineFeedStream});
223    #endif
224
225    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
[5837]226    kernel::Kernel * s2pk = nullptr;
227    if (PabloTransposition) {
228        s2pk = mGrepDriver->addKernelInstance<kernel::S2P_PabloKernel>(idb);
229    }
230    else {
231        s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
232    }
[5704]233    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
[5770]234
[5831]235    #ifndef USE_DIRECT_LF_BUILDER
[5861]236    kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
[5782]237    mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
[5831]238    #endif
[5782]239
[5831]240    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
[5770]241
[5755]242    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
[5867]243    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
244    mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
[5770]245
[5887]246   
247    std::map<std::string, StreamSetBuffer *> propertyStream;
248    if (PropertyKernels) {
249        for (auto p : UnicodeProperties) {
250            auto name = p->getFullName();
251            StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
252            propertyStream.emplace(std::make_pair(name, s));
253            kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
254            mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
255        }
256    }
257    StreamSetBuffer * GCB_stream = nullptr;
258    if (anyGCB) {
259        GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
260        kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
261        mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
262    }
263
264    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
265    for(unsigned i = 0; i < nREs; ++i) {
[5803]266        REs[i] = resolveModesAndExternalSymbols(REs[i]);
267        REs[i] = excludeUnicodeLineBreak(REs[i]);
[5881]268        //re::Name * unicodeLB = re::makeName("UTF8_LB", re::Name::Type::Unicode);
269        //unicodeLB->setDefinition(re::makeCC(0x0A));
270        //REs[i] = resolveAnchors(REs[i], unicodeLB);
[5824]271        REs[i] = regular_expression_passes(REs[i]);
[5881]272        std::vector<std::string> externalStreamNames = std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"};
273        std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits, LineBreakStream, RequiredStreams};
[5887]274        std::set<re::Name *> UnicodeProperties;
275        if (PropertyKernels) {
276            re::gatherUnicodeProperties(REs[i], UnicodeProperties);
277            for (auto p : UnicodeProperties) {
278                auto name = p->getFullName();
279                auto f = propertyStream.find(name);
280                if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
281                externalStreamNames.push_back(name);
282                icgrepInputSets.push_back(f->second);
283            }
284        }
285        if (hasGCB[i]) {
[5881]286            externalStreamNames.push_back("\\b{g}");
287            icgrepInputSets.push_back(GCB_stream);
288        }
[5841]289        if (CC_Multiplexing) {
[5881]290            const auto UnicodeSets = re::collectUnicodeSets(REs[i], std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
[5841]291            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
292            if (UnicodeSets.size() <= 1) {
[5881]293                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames);
294                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
[5841]295                MatchResultsBufs[i] = MatchResults;
296            } else {
297                mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
298                REs[i] = transformCCs(mpx.get(), REs[i]);
299                std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
300                auto numOfCharacterClasses = mpx_basis.size();
301                StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
[5860]302                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
303                mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
[5856]304//                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
305//                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
[5881]306                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
307                icgrepInputSets.push_back(CharClasses);
308                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
[5841]309                MatchResultsBufs[i] = MatchResults;
310            }
311        } else {
312            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
[5881]313            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames);
314            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
[5816]315            MatchResultsBufs[i] = MatchResults;
316        }
[5704]317    }
318    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
319    if (REs.size() > 1) {
[5831]320        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
[5755]321        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, REs.size());
[5704]322        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
323    }
324    StreamSetBuffer * Matches = MergedResults;
[5770]325
[5704]326    if (mMoveMatchesToEOL) {
327        StreamSetBuffer * OriginalMatches = Matches;
[5755]328        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
[5831]329        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
[5704]330        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
331    }
[5770]332
[5704]333    if (InvertMatchFlag) {
[5755]334        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
[5704]335        StreamSetBuffer * OriginalMatches = Matches;
[5831]336        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
[5704]337        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
338    }
339    if (MaxCountFlag > 0) {
[5755]340        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
[5704]341        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
[5831]342        StreamSetBuffer * const AllMatches = Matches;
343        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
[5704]344        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
345    }
346    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
[5700]347}
[5704]348
349// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
350// which returns a count of the matches found (possibly subject to a MaxCount).
[5700]351//
352
[5704]353void GrepEngine::grepCodeGen(std::vector<re::RE *> REs) {
[5770]354
[5704]355    assert (mGrepDriver == nullptr);
356    mGrepDriver = new ParabixDriver("engine");
357    auto & idb = mGrepDriver->getBuilder();
358    Module * M = idb->getModule();
[5770]359
[5704]360    const unsigned encodingBits = 8;
[5770]361
[5704]362    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), nullptr));
363    mainFunc->setCallingConv(CallingConv::C);
364    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
365    auto args = mainFunc->arg_begin();
[5770]366
[5704]367    Value * const fileDescriptor = &*(args++);
368    fileDescriptor->setName("fileDescriptor");
[5770]369
[5755]370    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
[5856]371    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
[5704]372    sourceK->setInitialArguments({fileDescriptor});
373    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
[5770]374
[5704]375    StreamSetBuffer * LineBreakStream;
376    StreamSetBuffer * Matches;
377    std::tie(LineBreakStream, Matches) = grepPipeline(REs, ByteStream);
[5770]378
[5755]379    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance<kernel::PopcountKernel>(idb);
[5704]380    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
381    mGrepDriver->generatePipelineIR();
382    idb->setKernel(matchCountK);
383    Value * matchedLineCount = idb->getAccumulator("countResult");
384    matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
385    mGrepDriver->deallocateBuffers();
386    idb->CreateRet(matchedLineCount);
387    mGrepDriver->finalizeObject();
388}
389
390//
391// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
392// matched lines.
393
[5700]394class EmitMatch : public MatchAccumulator {
[5703]395    friend class EmitMatchesEngine;
[5700]396public:
[5771]397    EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
[5700]398    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
399    void finalize_match(char * buffer_end) override;
[5703]400protected:
[5700]401    std::string mLinePrefix;
402    size_t mLineCount;
[5758]403    bool mTerminated;
[5771]404    std::ostringstream & mResultStr;
[5700]405};
406
[5704]407//
408//  Default Report Match:  lines are emitted with whatever line terminators are found in the
409//  input.  However, if the final line is not terminated, a new line is appended.
[5726]410//
[5700]411void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
[5758]412    if (WithFilenameFlag) {
[5771]413        mResultStr << mLinePrefix;
[5700]414    }
[5758]415    if (LineNumberFlag) {
416        // Internally line numbers are counted from 0.  For display, adjust
417        // the line number so that lines are numbered from 1.
418        if (InitialTabFlag) {
[5771]419            mResultStr << lineNum+1 << "\t:";
[5695]420        }
[5758]421        else {
[5771]422            mResultStr << lineNum+1 << ":";
[5700]423        }
[5695]424    }
[5758]425    size_t bytes = line_end - line_start + 1;
[5771]426    mResultStr.write(line_start, bytes);
[5700]427    mLineCount++;
[5758]428    unsigned last_byte = *line_end;
429    mTerminated = (last_byte >= 0x0A) && (last_byte <= 0x0D);
430    if (LLVM_UNLIKELY(!mTerminated)) {
431        if (last_byte == 0x85) {  //  Possible NEL terminator.
432            mTerminated = (bytes >= 2) && (static_cast<unsigned>(line_end[-1]) == 0xC2);
[5726]433        }
434        else {
[5758]435            // Possible LS or PS terminators.
436            mTerminated = (bytes >= 3) && (static_cast<unsigned>(line_end[-2]) == 0xE2)
437                                       && (static_cast<unsigned>(line_end[-1]) == 0x80)
438                                       && ((last_byte == 0xA8) || (last_byte == 0xA9));
[5726]439        }
[5700]440    }
441}
442
[5758]443void EmitMatch::finalize_match(char * buffer_end) {
[5771]444    if (!mTerminated) mResultStr << "\n";
[5758]445}
446
[5704]447void EmitMatchesEngine::grepCodeGen(std::vector<re::RE *> REs) {
448    assert (mGrepDriver == nullptr);
449    mGrepDriver = new ParabixDriver("engine");
450    auto & idb = mGrepDriver->getBuilder();
451    Module * M = idb->getModule();
[5770]452
[5704]453    const unsigned encodingBits = 8;
[5770]454
[5704]455    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
456    mainFunc->setCallingConv(CallingConv::C);
457    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
458    auto args = mainFunc->arg_begin();
[5770]459
[5704]460    Value * const fileDescriptor = &*(args++);
461    fileDescriptor->setName("fileDescriptor");
462    Value * match_accumulator = &*(args++);
463    match_accumulator->setName("match_accumulator");
[5770]464
[5755]465    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
[5856]466    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
[5704]467    sourceK->setInitialArguments({fileDescriptor});
468    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
[5770]469
[5704]470    StreamSetBuffer * LineBreakStream;
471    StreamSetBuffer * Matches;
472    std::tie(LineBreakStream, Matches) = grepPipeline(REs, ByteStream);
[5770]473
[5755]474    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
[5704]475    scanMatchK->setInitialArguments({match_accumulator});
476    mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
477    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
478    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
[5770]479
[5704]480    mGrepDriver->generatePipelineIR();
481    mGrepDriver->deallocateBuffers();
482    idb->CreateRet(idb->getInt64(0));
483    mGrepDriver->finalizeObject();
484}
[5700]485
486
[5704]487//
488//  The doGrep methods apply a GrepEngine to a single file, processing the results
489//  differently based on the engine type.
[5770]490
[5704]491uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
492    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
493    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
[5770]494
[5771]495    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
[5704]496    if (fileDescriptor == -1) return 0;
[5770]497
[5704]498    uint64_t grepResult = f(fileDescriptor);
499    close(fileDescriptor);
500    return grepResult;
501}
502
503uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
504    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
[5771]505    if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
506    mResultStrs[fileIdx] << grepResult << "\n";
[5704]507    return grepResult;
508}
509
510std::string GrepEngine::linePrefix(std::string fileName) {
511    if (fileName == "-") {
512        return LabelFlag + mFileSuffix;
[5548]513    }
[5704]514    else {
515        return fileName + mFileSuffix;
[5548]516    }
517}
[5770]518
[5704]519uint64_t MatchOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
520    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
521    if (grepResult == mRequiredCount) {
[5771]522       mResultStrs[fileIdx] << linePrefix(fileName);
[5704]523    }
524    return grepResult;
525}
[5700]526
[5704]527uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
528    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, intptr_t accum_addr);
529    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
[5770]530
[5771]531    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
[5704]532    if (fileDescriptor == -1) return 0;
[5771]533    EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx]);
[5740]534    f(fileDescriptor, reinterpret_cast<intptr_t>(&accum));
[5704]535    close(fileDescriptor);
536    if (accum.mLineCount > 0) grepMatchFound = true;
537    return accum.mLineCount;
538}
539
[5703]540// Open a file and return its file desciptor.
[5771]541int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) {
[5693]542    if (fileName == "-") {
[5700]543        return STDIN_FILENO;
[5693]544    }
[5700]545    else {
546        struct stat sb;
547        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
548        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
549            if (!NoMessagesFlag) {
550                if (errno == EACCES) {
[5771]551                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
[5700]552                }
553                else if (errno == ENOENT) {
[5771]554                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
[5700]555                }
556                else {
[5771]557                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
[5700]558                }
[5484]559            }
[5700]560            return fileDescriptor;
561        }
562        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
563            if (!NoMessagesFlag) {
[5771]564                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
[5484]565            }
[5700]566            close(fileDescriptor);
[5704]567            return -1;
[5484]568        }
[5700]569        return fileDescriptor;
[4788]570    }
[5700]571}
572
[5704]573// The process of searching a group of files may use a sequential or a task
574// parallel approach.
[5770]575
[5735]576void * DoGrepThreadFunction(void *args) {
[5740]577    return reinterpret_cast<GrepEngine *>(args)->DoGrepThreadMethod();
[5735]578}
[4949]579
[5704]580bool GrepEngine::searchAllFiles() {
[5795]581    const unsigned numOfThreads = std::min(static_cast<unsigned>(Threads), static_cast<unsigned>(inputFiles.size())); 
582    std::vector<pthread_t> threads(numOfThreads);
[5770]583
[5735]584    for(unsigned long i = 1; i < numOfThreads; ++i) {
585        const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
586        if (rc) {
587            llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
[5484]588        }
[5735]589    }
590    // Main thread also does the work;
[5770]591
[5735]592    DoGrepThreadMethod();
593    for(unsigned i = 1; i < numOfThreads; ++i) {
594        void * status = nullptr;
595        const int rc = pthread_join(threads[i], &status);
596        if (rc) {
597            llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
[5704]598        }
[5484]599    }
[5704]600    return grepMatchFound;
[5377]601}
[5314]602
[5338]603
[5704]604// DoGrep thread function.
[5735]605void * GrepEngine::DoGrepThreadMethod() {
[5748]606
[5771]607    unsigned fileIdx = mNextFileToGrep++;
[5735]608    while (fileIdx < inputFiles.size()) {
[5771]609        const auto grepResult = doGrep(inputFiles[fileIdx], fileIdx);
[5735]610        mFileStatus[fileIdx] = FileStatus::GrepComplete;
[5761]611        if (grepResult > 0) {
612            grepMatchFound = true;
[5735]613        }
614        if (QuietMode && grepMatchFound) {
[5761]615            if (pthread_self() != mEngineThread) {
616                pthread_exit(nullptr);
617            }
[5735]618            return nullptr;
619        }
[5761]620        fileIdx = mNextFileToGrep++;
[5574]621    }
[5740]622
[5771]623    unsigned printIdx = mNextFileToPrint++;
[5761]624    while (printIdx < inputFiles.size()) {
625        const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
[5735]626        if (readyToPrint) {
[5771]627            const auto output = mResultStrs[printIdx].str();
[5761]628            if (!output.empty()) {
[5771]629                llvm::outs() << output;
[5761]630            }
631            mFileStatus[printIdx] = FileStatus::PrintComplete;
632            printIdx = mNextFileToPrint++;
633        } else {
[5735]634            mGrepDriver->performIncrementalCacheCleanupStep();
635        }
[5762]636        sched_yield();
[5735]637    }
[5761]638
[5735]639    if (pthread_self() != mEngineThread) {
640        pthread_exit(nullptr);
[5761]641    } else {
[5795]642        // Always perform one final cache cleanup step.
643        mGrepDriver->performIncrementalCacheCleanupStep();
[5735]644    }
[5812]645    return nullptr;
[5703]646}
[5740]647
[5481]648}
Note: See TracBrowser for help on using the repository browser.