source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 5652

Last change on this file since 5652 was 5646, checked in by nmedfort, 22 months ago

Minor clean up. Bug fix for object cache when the same cached kernel is used twice in a single run. Improvement to RE Minimizer.

File size: 29.4 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "grep_engine.h"
8#include "grep_interface.h"
9#include <llvm/IR/Module.h>
10#include <boost/filesystem.hpp>
11#include <UCD/UnicodeNameData.h>
12#include <UCD/resolve_properties.h>
13#include <kernels/charclasses.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/grep_kernel.h>
16#include <kernels/linebreak_kernel.h>
17#include <kernels/streams_merge.h>
18#include <kernels/source_kernel.h>
19#include <kernels/s2p_kernel.h>
20#include <kernels/scanmatchgen.h>
21#include <kernels/streamset.h>
22#include <kernels/until_n.h>
23#include <kernels/kernel_builder.h>
24#include <pablo/pablo_kernel.h>
25#include <re/re_cc.h>
26#include <re/re_toolchain.h>
27#include <toolchain/toolchain.h>
28#include <re/re_name_resolve.h>   
29#include <re/re_collect_unicodesets.h>
30#include <re/re_multiplex.h>
31#include <toolchain/cpudriver.h>
32#include <toolchain/NVPTXDriver.h>
33#include <iostream>
34#include <sstream>
35#include <cc/multiplex_CCs.h>
36#include <llvm/Support/raw_ostream.h>
37#include <util/aligned_allocator.h>
38#include <sys/stat.h>
39#include <fcntl.h>
40#include <errno.h>
41#include <mutex>
42#ifdef CUDA_ENABLED
43#include <preprocess.cpp>
44#include <IR_Gen/CudaDriver.h>
45#endif
46
47using namespace parabix;
48using namespace llvm;
49
50namespace grep {
51
52static std::stringstream * resultStrs = nullptr;
53static std::vector<std::string> inputFiles;
54static std::vector<std::string> linePrefix;
55static bool grepMatchFound;
56
57size_t * startPoints = nullptr;
58size_t * accumBytes = nullptr;
59
60
61std::mutex count_mutex;
62size_t fileCount;
63
64// DoGrep thread function.
65void *DoGrepThreadFunction(void *args)
66{
67    size_t fileIdx;
68    grep::GrepEngine * grepEngine = (grep::GrepEngine *)args;
69
70    count_mutex.lock();
71    fileIdx = fileCount;
72    fileCount++;
73    count_mutex.unlock();
74
75    while (fileIdx < inputFiles.size()) {
76        size_t grepResult = grepEngine->doGrep(inputFiles[fileIdx], fileIdx);
77       
78        count_mutex.lock();
79        if (grepResult > 0) grepMatchFound = true;
80        fileIdx = fileCount;
81        fileCount++;
82        count_mutex.unlock();
83        if (QuietMode && grepMatchFound) pthread_exit(nullptr);
84    }
85
86    pthread_exit(nullptr);
87}
88
89bool matchesNeedToBeMovedToEOL() {
90    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
91        return false;
92    }
93    else if (LineRegexpFlag) {
94        return false;
95    }
96    // TODO: return false for other cases based on regexp analysis, e.g., regexp ends with $.
97    return true;
98}
99   
100void GrepEngine::doGrep(const std::string & fileName, std::string & PTXFilename) const{
101#ifdef CUDA_ENABLED
102    const bool CountOnly = true;
103    boost::filesystem::path file(fileName);
104    if (exists(file)) {
105        if (is_directory(file)) {
106            return;
107        }
108    } else {
109        if (!NoMessagesFlag) {
110            std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
111            return;
112        }
113    }
114
115    const auto fileSize = file_size(file);
116   
117    if (fileSize > 0) {
118        try {
119            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
120            char * fileBuffer = const_cast<char *>(source.data());
121           
122            codegen::BlockSize = 128;
123            std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize);
124           
125            const unsigned numOfGroups = codegen::GroupNum;
126            if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
127                posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
128                std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
129                exit(-1);
130            }
131            if(PTXFilename=="")
132                PTXFilename = mGrepDriver->getBuilder()->getModule()->getModuleIdentifier() + ".ptx";
133            RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
134            source.close();
135        } catch (std::exception & e) {
136            if (!NoMessagesFlag) {
137                std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
138                return;
139            }
140        }
141    } else {
142        std::cout << 0 << std::endl;
143    }
144#endif
145}
146
147uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) const {
148    struct stat sb;
149    const int32_t fd = open(fileName.c_str(), O_RDONLY);
150    if (LLVM_UNLIKELY(fd == -1)) {
151        if (!NoMessagesFlag  && !(Mode == QuietMode)) {
152            if (errno == EACCES) {
153                resultStrs[fileIdx] << "icgrep: " << fileName << ": Permission denied.\n";
154            }
155            else if (errno == ENOENT) {
156                resultStrs[fileIdx] << "icgrep: " << fileName << ": No such file.\n";
157            }
158            else {
159                resultStrs[fileIdx] << "icgrep: " << fileName << ": Failed.\n";
160            }
161        }
162        return 0;
163    }
164    if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
165        if (!NoMessagesFlag  && !(Mode == QuietMode)) {
166            resultStrs[fileIdx] << "icgrep: " << fileName << ": Is a directory.\n";
167        }
168        close(fd);
169        return 0;
170    }
171    const auto result = doGrep(fd, fileIdx);
172    close(fd);
173    return result;
174}
175
176uint64_t GrepEngine::doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) const {
177    assert (mGrepDriver);
178    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, const uint32_t fileIdx);
179    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
180   
181    uint64_t grepResult = f(fileDescriptor, fileIdx);
182    if (grepResult > 0) grepMatchFound = true;
183    else if ((Mode == NormalMode) && !resultStrs[fileIdx].str().empty()) grepMatchFound = true;
184   
185    if (Mode == CountOnly) {
186        resultStrs[fileIdx] << linePrefix[fileIdx] << grepResult << "\n";
187    }
188    else if (Mode == FilesWithMatch || Mode == FilesWithoutMatch ) {
189        size_t requiredCount = Mode == FilesWithMatch ? 1 : 0;
190        if (grepResult == requiredCount) {
191            resultStrs[fileIdx] << linePrefix[fileIdx];
192        }
193    }
194    else if (Mode == QuietMode) {
195        if (grepMatchFound) exit(MatchFoundExitCode);
196    }
197    return grepResult;
198}
199
200void initFileResult(std::vector<std::string> filenames){
201    grepMatchFound = false;
202    const int n = filenames.size();
203    linePrefix.resize(n);
204    if ((n > 1) && !NoFilenameFlag) {
205        WithFilenameFlag = true;
206    }
207    std::string fileSuffix = "";
208    bool setLinePrefix = WithFilenameFlag || (Mode == FilesWithMatch) || (Mode == FilesWithoutMatch);
209    if (setLinePrefix) {
210        if (NullFlag) {
211            fileSuffix = std::string("\0", 1);
212        }
213        else if ((Mode == NormalMode) && InitialTabFlag && !(LineNumberFlag || ByteOffsetFlag)) {
214            fileSuffix = "\t:";
215        }
216        else if ((Mode == NormalMode) || (Mode == CountOnly)) {
217            fileSuffix = ":";
218        }
219        else if ((Mode == FilesWithMatch) || (Mode == FilesWithoutMatch)) {
220            fileSuffix = "\n";
221        }
222    }
223    inputFiles = filenames;
224    resultStrs = new std::stringstream[n];
225    for (unsigned i = 0; i < inputFiles.size(); ++i) {
226        if (setLinePrefix) {
227            if (inputFiles[i] == "-") {
228                linePrefix[i] = LabelFlag + fileSuffix;
229            }
230            else {
231                linePrefix[i] = inputFiles[i] + fileSuffix;
232            }
233        }
234    }
235}
236
237template<typename CodeUnit>
238void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
239
240//    errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
241
242    assert (buffer);
243    assert (line_start <= line_end);
244    assert (line_end <= filesize);
245
246    if (WithFilenameFlag) {
247        resultStrs[fileIdx] << linePrefix[fileIdx];
248    }
249    if (LineNumberFlag) {
250        // Internally line numbers are counted from 0.  For display, adjust
251        // the line number so that lines are numbered from 1.
252        if (InitialTabFlag) {
253            resultStrs[fileIdx] << lineNum+1 << "\t:";
254        }
255        else {
256            resultStrs[fileIdx] << lineNum+1 << ":";
257        }
258    }
259
260    // If the line "starts" on the LF of a CRLF, it is actually the end of the last line.
261    if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
262        ++line_start;
263    }
264
265    if (LLVM_UNLIKELY(line_end == filesize)) {
266        // The match position is at end-of-file.   We have a final unterminated line.
267        resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
268        if (NormalizeLineBreaksFlag) {
269            resultStrs[fileIdx] << '\n';  // terminate it
270        }
271    } else {
272        const auto end_byte = buffer[line_end];
273        if (grep::NormalizeLineBreaksFlag) {
274            if (LLVM_UNLIKELY(end_byte == 0x85)) {
275                // Line terminated with NEL, on the second byte.  Back up 1.
276                line_end -= 1;
277            } else if (LLVM_UNLIKELY(end_byte > 0xD)) {
278                // Line terminated with PS or LS, on the third byte.  Back up 2.
279                line_end -= 2;
280            }
281            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
282            resultStrs[fileIdx] << '\n';
283        } else {
284            if (end_byte == 0x0D) {
285                // Check for line_end on first byte of CRLF; we don't want to access past the end of buffer.
286                if ((line_end + 1) < filesize) {
287                    if (buffer[line_end + 1] == 0x0A) {
288                        // Found CRLF; preserve both bytes.
289                        ++line_end;
290                    }
291                }
292            }
293            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start + 1) * sizeof(CodeUnit));
294        }
295    }
296}
297
298void PrintResults(){
299   
300    for (unsigned i = 0; i < inputFiles.size(); ++i){
301        std::cout << resultStrs[i].str();
302    }
303    exit(grepMatchFound ? MatchFoundExitCode : MatchNotFoundExitCode);
304}
305
306   
307std::pair<StreamSetBuffer *, StreamSetBuffer *> grepPipeline(Driver * grepDriver, std::vector<re::RE *> & REs, const GrepModeType grepMode, unsigned encodingBits, StreamSetBuffer * ByteStream) {
308    auto & idb = grepDriver->getBuilder();
309    const unsigned segmentSize = codegen::SegmentSize;
310    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
311    size_t MatchLimit = ((grepMode == QuietMode) | (grepMode == FilesWithMatch) | (grepMode == FilesWithoutMatch)) ? 1 : MaxCountFlag;
312   
313
314    StreamSetBuffer * BasisBits = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), segmentSize * bufferSegments));
315    kernel::Kernel * s2pk = grepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
316    grepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
317   
318    StreamSetBuffer * LineBreakStream = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
319    kernel::Kernel * linebreakK = grepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
320    grepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
321   
322    kernel::Kernel * requiredStreamsK = grepDriver->addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
323    StreamSetBuffer * RequiredStreams = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize * bufferSegments));
324    grepDriver->makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
325   
326    const auto n = REs.size();
327   
328    std::vector<std::vector<UCD::UnicodeSet>> charclasses(n);
329
330    for (unsigned i = 0; i < n; i++) {
331        REs[i] = re::resolveNames(REs[i]);
332        std::vector<UCD::UnicodeSet> UnicodeSets = re::collect_UnicodeSets(REs[i]);
333        std::vector<std::vector<unsigned>> exclusiveSetIDs;
334        doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses[i]);
335        REs[i] = multiplex(REs[i], UnicodeSets, exclusiveSetIDs);
336    } 
337
338    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
339
340    for(unsigned i = 0; i < n; ++i){
341        const auto numOfCharacterClasses = charclasses[i].size();
342        StreamSetBuffer * CharClasses = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), segmentSize * bufferSegments));
343        kernel::Kernel * ccK = grepDriver->addKernelInstance(make_unique<kernel::CharClassesKernel>(idb, std::move(charclasses[i])));
344        grepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
345        StreamSetBuffer * MatchResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
346        kernel::Kernel * icgrepK = grepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i], numOfCharacterClasses));
347        grepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, RequiredStreams}, {MatchResults});
348        MatchResultsBufs[i] = MatchResults;
349    }
350    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
351    if (REs.size() > 1) {
352        MergedResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
353        kernel::Kernel * streamsMergeK = grepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
354        grepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
355    }
356    StreamSetBuffer * Matches = MergedResults;
357   
358    if (matchesNeedToBeMovedToEOL()) {
359        StreamSetBuffer * OriginalMatches = Matches;
360        kernel::Kernel * matchedLinesK = grepDriver->addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
361        Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
362        grepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
363    }
364   
365    if (InvertMatchFlag) {
366        kernel::Kernel * invertK = grepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
367        StreamSetBuffer * OriginalMatches = Matches;
368        Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
369        grepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
370    }
371    if (MatchLimit > 0) {
372        kernel::Kernel * untilK = grepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
373        untilK->setInitialArguments({idb->getSize(MatchLimit)});
374        StreamSetBuffer * AllMatches = Matches;
375        Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
376        grepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
377    }
378    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
379}
380
381
382   
383void GrepEngine::grepCodeGen_nvptx(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16) {
384
385    assert (mGrepDriver == nullptr);
386
387    mGrepDriver = new NVPTXDriver("engine");
388    auto & idb = mGrepDriver->getBuilder();
389    Module * M = idb->getModule();
390
391    const unsigned segmentSize = codegen::SegmentSize;
392    const unsigned encodingBits = UTF_16 ? 16 : 8;
393
394    Type * const int64Ty = idb->getInt64Ty();
395    Type * const int32Ty = idb->getInt32Ty();
396    Type * const size_ty = idb->getSizeTy();
397    Type * const sizeTyPtr = PointerType::get(size_ty, 1);
398    Type * const int64tyPtr = PointerType::get(int64Ty, 1);
399    Type * const voidTy = idb->getVoidTy();
400   
401    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
402    mainFunc->setCallingConv(CallingConv::C);
403    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
404    auto args = mainFunc->arg_begin();
405
406    Value * const inputPtr = &*(args++);
407    inputPtr->setName("inputPtr");
408    Value * const startPointsPtr = &*(args++);
409    startPointsPtr->setName("startPointsPtr");
410    Value * const bufferSizesPtr = &*(args++);
411    bufferSizesPtr->setName("bufferSizesPtr");
412    Value * const outputPtr = &*(args++);
413    outputPtr->setName("outputPtr");
414
415    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
416    Value * tid = idb->CreateCall(tidFunc);
417    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
418    Value * bid = idb->CreateCall(bidFunc);
419
420    Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
421    Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
422    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
423    Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
424    Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
425    Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
426
427    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
428    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
429    sourceK->setInitialArguments({inputStream, bufferSize});
430    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
431
432    StreamSetBuffer * Matches = std::get<1>(grepPipeline(mGrepDriver, REs, grepMode, encodingBits, ByteStream));
433   
434    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::PopcountKernel>(idb));
435    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
436    mGrepDriver->generatePipelineIR();
437    idb->setKernel(matchCountK);
438    Value * matchedLineCount = idb->getAccumulator("countResult");
439    matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
440   
441    Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
442    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
443    idb->CreateStore(matchedLineCount, outputThreadPtr);
444    mGrepDriver->deallocateBuffers();
445    idb->CreateRetVoid();
446
447    mGrepDriver->finalizeObject();
448}
449
450void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16, GrepSource grepSource) {
451
452    assert (mGrepDriver == nullptr);
453    mGrepDriver = new ParabixDriver("engine");
454    auto & idb = mGrepDriver->getBuilder();
455    Module * M = idb->getModule();
456
457    const unsigned segmentSize = codegen::SegmentSize;
458    const unsigned encodingBits = UTF_16 ? 16 : 8;
459
460    Type * const int64Ty = idb->getInt64Ty();
461    Type * const int32Ty = idb->getInt32Ty();
462
463    kernel::Kernel * sourceK = nullptr;
464   
465    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt32Ty(), int32Ty, nullptr));
466    mainFunc->setCallingConv(CallingConv::C);
467    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
468    auto args = mainFunc->arg_begin();
469
470    Value * const fileDescriptor = &*(args++);
471    fileDescriptor->setName("fileDescriptor");
472    Value * fileIdx = &*(args++);
473    fileIdx->setName("fileIdx");
474
475    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
476
477    if (grepSource == GrepSource::File) {
478        sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MMapSourceKernel>(idb, segmentSize));
479    } else {
480        sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::ReadSourceKernel>(idb, segmentSize));
481    }
482    sourceK->setInitialArguments({fileDescriptor});
483
484    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
485   
486    StreamSetBuffer * LineBreakStream;
487    StreamSetBuffer * Matches;
488    std::tie(LineBreakStream, Matches) = grepPipeline(mGrepDriver, REs, grepMode, encodingBits, ByteStream);
489   
490    if (grepMode == NormalMode) {
491        kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::Normal, encodingBits));
492        scanMatchK->setInitialArguments({fileIdx});
493        mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
494        if (UTF_16) {
495            mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
496        } else {
497            mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
498        }
499        mGrepDriver->generatePipelineIR();
500        mGrepDriver->deallocateBuffers();
501
502        idb->CreateRet(idb->getInt64(0));
503    } else {
504        kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::PopcountKernel>(idb));
505        mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
506        mGrepDriver->generatePipelineIR();
507        idb->setKernel(matchCountK);
508        Value * matchedLineCount = idb->getAccumulator("countResult");
509        matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
510        mGrepDriver->deallocateBuffers();
511        idb->CreateRet(matchedLineCount);
512    }
513    mGrepDriver->finalizeObject();
514}
515
516GrepEngine::GrepEngine()
517: mGrepDriver(nullptr) {
518
519}
520
521GrepEngine::~GrepEngine() {
522    delete mGrepDriver;
523}
524
525
526   
527static re::CC * parsedCodePointSet = nullptr;
528
529void insert_codepoints(const size_t lineNum, const size_t line_start, const size_t line_end, const char * const buffer) {
530    assert (buffer);
531    assert (line_start <= line_end);
532    re::codepoint_t c = 0;
533    size_t line_pos = line_start;
534    while (isxdigit(buffer[line_pos])) {
535        assert (line_pos < line_end);
536        if (isdigit(buffer[line_pos])) {
537            c = (c << 4) | (buffer[line_pos] - '0');
538        }
539        else {
540            c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
541        }
542        line_pos++;
543    }
544    assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
545    parsedCodePointSet->insert(c);
546}
547
548re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength) {
549    parsedCodePointSet = re::makeCC();       
550    const unsigned segmentSize = 8;
551
552    ParabixDriver pxDriver("codepointEngine");
553    auto & idb = pxDriver.getBuilder();
554    Module * M = idb->getModule();
555   
556    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
557    mainFunc->setCallingConv(CallingConv::C);
558    auto args = mainFunc->arg_begin();
559    Value * const buffer = &*(args++);
560    buffer->setName("buffer");
561    Value * length = &*(args++);
562    length->setName("length");
563   
564    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
565   
566    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
567    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
568    sourceK->setInitialArguments({buffer, length});
569    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
570   
571    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
572   
573    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
574    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
575   
576    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
577    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
578    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
579   
580    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
581    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize));
582    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
583   
584    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
585    kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, pattern));
586    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
587   
588    StreamSetBuffer * MatchedLines = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
589    kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
590    pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
591   
592    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::NameExpression, 8));
593    scanMatchK->setInitialArguments({idb->getInt32(0)});
594    pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
595    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_codepoints);
596    pxDriver.generatePipelineIR();
597    pxDriver.deallocateBuffers();
598    idb->CreateRetVoid();
599    pxDriver.finalizeObject();
600   
601    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
602    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
603    f(UnicodeDataBuffer, bufferLength);
604   
605    return parsedCodePointSet;   
606}
607
608   
609static std::vector<std::string> parsedPropertyValues;
610
611void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
612    assert (line_start <= line_end);
613    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
614}
615
616
617const std::vector<std::string> & grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern) {
618    ParabixDriver pxDriver("propertyValueEngine");
619    AlignedAllocator<char, 32> alloc;
620
621    parsedPropertyValues.clear();
622
623    const std::string & str = UCD::getPropertyValueGrepString(propertyName);
624
625    auto & idb = pxDriver.getBuilder();
626
627    const unsigned segmentSize = 8;
628    const auto n = str.length();
629    const auto w = idb->getBitBlockWidth() * segmentSize;
630    const auto m = w - (n % w);
631
632    char * aligned = alloc.allocate(n + m, 0);
633    std::memcpy(aligned, str.data(), n);
634    std::memset(aligned + n, 0, m);
635
636    Module * M = idb->getModule();
637   
638    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
639    mainFunc->setCallingConv(CallingConv::C);
640    auto args = mainFunc->arg_begin();
641    Value * const buffer = &*(args++);
642    buffer->setName("buffer");
643    Value * length = &*(args++);
644    length->setName("length");
645   
646    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
647   
648    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
649    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
650    sourceK->setInitialArguments({buffer, length});
651    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
652   
653    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
654   
655    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
656    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
657   
658    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
659    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
660    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
661   
662    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
663    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize));
664    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
665   
666    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
667    kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, propertyValuePattern));
668    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
669
670    StreamSetBuffer * MatchedLines = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
671    kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
672    pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
673
674    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::PropertyValue, 8));
675    scanMatchK->setInitialArguments({idb->getInt32(0)});
676    pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
677    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_property_values);
678    pxDriver.generatePipelineIR();
679    pxDriver.deallocateBuffers();
680    idb->CreateRetVoid();
681    pxDriver.finalizeObject();
682
683    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
684    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
685    f(aligned, n);
686   
687    alloc.deallocate(aligned, 0);
688    return parsedPropertyValues;
689}
690
691   
692}
Note: See TracBrowser for help on using the repository browser.