source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 5697

Last change on this file since 5697 was 5697, checked in by cameron, 2 years ago

Some tidying up

File size: 15.0 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "grep_engine.h"
8#include "grep_interface.h"
9#include <llvm/IR/Module.h>
10#include <boost/filesystem.hpp>
11#include <UCD/resolve_properties.h>
12#include <kernels/charclasses.h>
13#include <kernels/cc_kernel.h>
14#include <kernels/grep_kernel.h>
15#include <kernels/linebreak_kernel.h>
16#include <kernels/streams_merge.h>
17#include <kernels/source_kernel.h>
18#include <kernels/s2p_kernel.h>
19#include <kernels/scanmatchgen.h>
20#include <kernels/streamset.h>
21#include <kernels/until_n.h>
22#include <kernels/kernel_builder.h>
23#include <pablo/pablo_kernel.h>
24#include <re/re_cc.h>
25#include <re/re_toolchain.h>
26#include <toolchain/toolchain.h>
27#include <re/re_name_resolve.h>   
28#include <re/re_collect_unicodesets.h>
29#include <re/re_multiplex.h>
30#include <toolchain/cpudriver.h>
31#include <toolchain/NVPTXDriver.h>
32#include <iostream>
33#include <cc/multiplex_CCs.h>
34#include <llvm/Support/raw_ostream.h>
35#include <util/aligned_allocator.h>
36#include <sys/stat.h>
37#include <fcntl.h>
38#include <errno.h>
39#include <llvm/ADT/STLExtras.h> // for make_unique
40
41using namespace parabix;
42using namespace llvm;
43
44namespace grep {
45
46
47// DoGrep thread function.
48void *DoGrepThreadFunction(void *args)
49{
50    size_t fileIdx;
51    grep::GrepEngine * grepEngine = (grep::GrepEngine *)args;
52
53    grepEngine->count_mutex.lock();
54    fileIdx = grepEngine->fileCount;
55    grepEngine->fileCount++;
56    grepEngine->count_mutex.unlock();
57
58    while (fileIdx < grepEngine->inputFiles.size()) {
59        size_t grepResult = grepEngine->doGrep(grepEngine->inputFiles[fileIdx], fileIdx);
60       
61        grepEngine->count_mutex.lock();
62        if (grepResult > 0) grepEngine->grepMatchFound = true;
63        fileIdx = grepEngine->fileCount;
64        grepEngine->fileCount++;
65        grepEngine->count_mutex.unlock();
66        if (QuietMode && grepEngine->grepMatchFound) pthread_exit(nullptr);
67    }
68
69    pthread_exit(nullptr);
70}
71   
72   
73    void NonNormalizingReportMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
74        if (!(WithFilenameFlag | LineNumberFlag) && (line_start == mPrevious_line_end + 1)) {
75            // Consecutive matches: only one write call needed.
76            mResultStr.write(mPrevious_line_end, line_end - mPrevious_line_end);
77        }
78        else {
79            if (mLineCount > 0) {
80                // deal with the final byte of the previous line.
81                mResultStr.write(mPrevious_line_end, 1);
82            }
83            if (WithFilenameFlag) {
84                mResultStr << mLinePrefix;
85            }
86            if (LineNumberFlag) {
87                // Internally line numbers are counted from 0.  For display, adjust
88                // the line number so that lines are numbered from 1.
89                if (InitialTabFlag) {
90                    mResultStr << lineNum+1 << "\t:";
91                }
92                else {
93                    mResultStr << lineNum+1 << ":";
94                }
95            }
96            mResultStr.write(line_start, line_end - line_start);
97        }
98        mPrevious_line_end = line_end;
99        mLineCount++;
100    }
101   
102    void NonNormalizingReportMatch::finalize_match(char * buffer_end) {
103        if (mLineCount == 0) return;  // No matches.
104        if (mPrevious_line_end < buffer_end) {
105            mResultStr.write(mPrevious_line_end, 1);
106        }
107        else {
108            // Likely unterminated final line.
109            char last_byte = mPrevious_line_end[-1];
110            if (last_byte == 0x0D) {
111                // The final CR is acceptable as a line_end.
112                return;
113            }
114            // Terminate the line with an LF
115            // (Even if we had an incomplete UTF-8 sequence.)
116            mResultStr << "\n";
117        }
118    }
119   
120   
121
122bool matchesNeedToBeMovedToEOL() {
123    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
124        return false;
125    }
126    else if (LineRegexpFlag) {
127        return false;
128    }
129    // TODO: return false for other cases based on regexp analysis, e.g., regexp ends with $.
130    return true;
131}
132   
133uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
134    if (fileName == "-") {
135        return doGrep(STDIN_FILENO, fileIdx);
136    }
137    struct stat sb;
138    const int32_t fd = open(fileName.c_str(), O_RDONLY);
139    if (LLVM_UNLIKELY(fd == -1)) {
140        if (!NoMessagesFlag  && !(Mode == QuietMode)) {
141            if (errno == EACCES) {
142                resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": Permission denied.\n";
143            }
144            else if (errno == ENOENT) {
145                resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": No such file.\n";
146            }
147            else {
148                resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": Failed.\n";
149            }
150        }
151        return 0;
152    }
153    if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
154        if (!NoMessagesFlag  && !(Mode == QuietMode)) {
155            resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": Is a directory.\n";
156        }
157        close(fd);
158        return 0;
159    }
160    const auto result = doGrep(fd, fileIdx);
161    close(fd);
162    return result;
163}
164
165uint64_t GrepEngine::doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) {
166    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, intptr_t accum_addr);
167    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
168   
169    uint64_t grepResult = f(fileDescriptor, reinterpret_cast<intptr_t>(resultAccums[fileIdx].get()));
170    if (grepResult > 0) grepMatchFound = true;
171    else if ((Mode == NormalMode) && !resultAccums[fileIdx]->mResultStr.str().empty()) grepMatchFound = true;
172   
173    if (Mode == CountOnly) {
174        resultAccums[fileIdx]->mResultStr << resultAccums[fileIdx]->mLinePrefix << grepResult << "\n";
175    }
176    else if (Mode == FilesWithMatch || Mode == FilesWithoutMatch ) {
177        size_t requiredCount = Mode == FilesWithMatch ? 1 : 0;
178        if (grepResult == requiredCount) {
179            resultAccums[fileIdx]->mResultStr << resultAccums[fileIdx]->mLinePrefix;
180        }
181    }
182    else if (Mode == QuietMode) {
183        if (grepMatchFound) exit(MatchFoundExitCode);
184    }
185    return grepResult;
186}
187
188void GrepEngine::initFileResult(std::vector<std::string> filenames){
189    grepMatchFound = false;
190    const int n = filenames.size();
191    if ((n > 1) && !NoFilenameFlag) {
192        WithFilenameFlag = true;
193    }
194    std::string fileSuffix = "";
195    bool setLinePrefix = WithFilenameFlag || (Mode == FilesWithMatch) || (Mode == FilesWithoutMatch);
196    if (setLinePrefix) {
197        if (NullFlag) {
198            fileSuffix = std::string("\0", 1);
199        }
200        else if ((Mode == NormalMode) && InitialTabFlag && !(LineNumberFlag || ByteOffsetFlag)) {
201            fileSuffix = "\t:";
202        }
203        else if ((Mode == NormalMode) || (Mode == CountOnly)) {
204            fileSuffix = ":";
205        }
206        else if ((Mode == FilesWithMatch) || (Mode == FilesWithoutMatch)) {
207            fileSuffix = "\n";
208        }
209    }
210    inputFiles = filenames;
211    for (unsigned i = 0; i < inputFiles.size(); ++i) {
212        std::string linePrefix;
213        if (setLinePrefix) {
214            if (inputFiles[i] == "-") {
215                linePrefix = LabelFlag + fileSuffix;
216            }
217            else {
218                linePrefix = inputFiles[i] + fileSuffix;
219            }
220        }
221        resultAccums.push_back(make_unique<NonNormalizingReportMatch>(linePrefix));
222    }
223}
224
225
226void GrepEngine::PrintResults(){
227   
228    for (unsigned i = 0; i < inputFiles.size(); ++i){
229        std::cout << resultAccums[i]->mResultStr.str();
230    }
231    exit(grepMatchFound ? MatchFoundExitCode : MatchNotFoundExitCode);
232}
233
234   
235std::pair<StreamSetBuffer *, StreamSetBuffer *> grepPipeline(Driver * grepDriver, std::vector<re::RE *> & REs, const GrepModeType grepMode, StreamSetBuffer * ByteStream) {
236    auto & idb = grepDriver->getBuilder();
237    const unsigned segmentSize = codegen::SegmentSize;
238    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
239    const unsigned encodingBits = 8;
240
241    StreamSetBuffer * BasisBits = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), segmentSize * bufferSegments));
242    kernel::Kernel * s2pk = grepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
243    grepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
244   
245    StreamSetBuffer * LineBreakStream = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
246    kernel::Kernel * linebreakK = grepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
247    grepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
248   
249    kernel::Kernel * requiredStreamsK = grepDriver->addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
250    StreamSetBuffer * RequiredStreams = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize * bufferSegments));
251    grepDriver->makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
252   
253    const auto n = REs.size();
254   
255    std::vector<std::vector<UCD::UnicodeSet>> charclasses(n);
256
257    for (unsigned i = 0; i < n; i++) {
258        REs[i] = re::resolveNames(REs[i]);
259        std::vector<UCD::UnicodeSet> UnicodeSets = re::collect_UnicodeSets(REs[i]);
260        std::vector<std::vector<unsigned>> exclusiveSetIDs;
261        doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses[i]);
262        REs[i] = multiplex(REs[i], UnicodeSets, exclusiveSetIDs);
263    } 
264
265    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
266
267    for(unsigned i = 0; i < n; ++i){
268        const auto numOfCharacterClasses = charclasses[i].size();
269        StreamSetBuffer * CharClasses = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), segmentSize * bufferSegments));
270        kernel::Kernel * ccK = grepDriver->addKernelInstance(make_unique<kernel::CharClassesKernel>(idb, std::move(charclasses[i])));
271        grepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
272        StreamSetBuffer * MatchResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
273        kernel::Kernel * icgrepK = grepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i], numOfCharacterClasses));
274        grepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, RequiredStreams}, {MatchResults});
275        MatchResultsBufs[i] = MatchResults;
276    }
277    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
278    if (REs.size() > 1) {
279        MergedResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
280        kernel::Kernel * streamsMergeK = grepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
281        grepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
282    }
283    StreamSetBuffer * Matches = MergedResults;
284   
285    if (matchesNeedToBeMovedToEOL()) {
286        StreamSetBuffer * OriginalMatches = Matches;
287        kernel::Kernel * matchedLinesK = grepDriver->addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
288        Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
289        grepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
290    }
291   
292    if (InvertMatchFlag) {
293        kernel::Kernel * invertK = grepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
294        StreamSetBuffer * OriginalMatches = Matches;
295        Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
296        grepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
297    }
298    if (MaxCountFlag > 0) {
299        kernel::Kernel * untilK = grepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
300        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
301        StreamSetBuffer * AllMatches = Matches;
302        Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
303        grepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
304    }
305    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
306}
307
308void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const GrepModeType grepMode) {
309
310    assert (mGrepDriver == nullptr);
311    mGrepDriver = new ParabixDriver("engine");
312    auto & idb = mGrepDriver->getBuilder();
313    Module * M = idb->getModule();
314
315    const unsigned segmentSize = codegen::SegmentSize;
316    const unsigned encodingBits = 8;
317
318    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
319    mainFunc->setCallingConv(CallingConv::C);
320    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
321    auto args = mainFunc->arg_begin();
322
323    Value * const fileDescriptor = &*(args++);
324    fileDescriptor->setName("fileDescriptor");
325    Value * match_accumulator = &*(args++);
326    match_accumulator->setName("match_accumulator");
327
328    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits)));
329    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::FDSourceKernel>(idb, segmentSize));
330    sourceK->setInitialArguments({fileDescriptor});
331    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
332   
333    StreamSetBuffer * LineBreakStream;
334    StreamSetBuffer * Matches;
335    std::tie(LineBreakStream, Matches) = grepPipeline(mGrepDriver, REs, grepMode, ByteStream);
336   
337    if (grepMode == NormalMode) {
338        kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb));
339        scanMatchK->setInitialArguments({match_accumulator});
340        mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
341        mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
342        mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
343
344       
345        mGrepDriver->generatePipelineIR();
346        mGrepDriver->deallocateBuffers();
347
348        idb->CreateRet(idb->getInt64(0));
349    } else {
350        kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::PopcountKernel>(idb));
351        mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
352        mGrepDriver->generatePipelineIR();
353        idb->setKernel(matchCountK);
354        Value * matchedLineCount = idb->getAccumulator("countResult");
355        matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
356        mGrepDriver->deallocateBuffers();
357        idb->CreateRet(matchedLineCount);
358    }
359    mGrepDriver->finalizeObject();
360}
361
362
363GrepEngine::~GrepEngine() {
364    delete mGrepDriver;
365}
366
367}
Note: See TracBrowser for help on using the repository browser.