source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 5464

Last change on this file since 5464 was 5464, checked in by nmedfort, 2 years ago

Restructuring work for the Driver classes. Start of work to eliminate the memory leaks with the ExecutionEngine?. Replaced custom AlignedMalloc? with backend call to std::aligned_malloc. Salvaged some work on DistributionPass? for reevaluation.

File size: 21.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "grep_engine.h"
8#include <llvm/IR/Module.h>
9#include <llvm/Support/CommandLine.h>
10#include <boost/filesystem.hpp>
11#include <UCD/UnicodeNameData.h>
12#include <UCD/resolve_properties.h>
13#include <kernels/cc_kernel.h>
14#include <kernels/grep_kernel.h>
15#include <kernels/linebreak_kernel.h>
16#include <kernels/streams_merge.h>
17#include <kernels/match_count.h>
18#include <kernels/source_kernel.h>
19#include <kernels/s2p_kernel.h>
20#include <kernels/scanmatchgen.h>
21#include <kernels/streamset.h>
22#include <kernels/until_n.h>
23#include <kernels/kernel_builder.h>
24#include <pablo/pablo_kernel.h>
25#include <re/re_cc.h>
26#include <re/re_toolchain.h>
27#include <toolchain/toolchain.h>
28#include <toolchain/cpudriver.h>
29#include <toolchain/NVPTXDriver.h>
30#include <iostream>
31#include <sstream>
32#include <cc/multiplex_CCs.h>
33#include <llvm/Support/raw_ostream.h>
34#include <util/aligned_allocator.h>
35#include <sys/stat.h>
36#include <fcntl.h>
37
38#ifdef CUDA_ENABLED
39#include <preprocess.cpp>
40#include <IR_Gen/CudaDriver.h>
41#endif
42
43using namespace parabix;
44using namespace llvm;
45
46static cl::OptionCategory bGrepOutputOptions("Output Options",
47                                             "These options control the output.");
48static cl::opt<bool> SilenceFileErrors("s", cl::desc("Suppress messages for file errors."), cl::init(false),  cl::cat(bGrepOutputOptions));
49
50static cl::opt<bool> SuppressOutput("q", cl::desc("Suppress normal output; set return code only."), cl::init(false),  cl::cat(bGrepOutputOptions));
51
52static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
53
54static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
55static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
56
57static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
58static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
59
60static cl::opt<int> MaxCount("m", cl::desc("Limit the number of matches per file."), cl::cat(bGrepOutputOptions), cl::init((size_t) -1));
61static cl::alias MaxCountLong("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCount));
62
63static re::CC * parsedCodePointSet = nullptr;
64
65static std::vector<std::string> parsedPropertyValues;
66
67std::string PTXFilename = "icgrep.ptx";
68size_t * startPoints = nullptr;
69size_t * accumBytes = nullptr;
70
71void GrepEngine::doGrep(const std::string & fileName) const{
72#ifdef CUDA_ENABLED
73    const bool CountOnly = true;
74    boost::filesystem::path file(fileName);
75    if (exists(file)) {
76        if (is_directory(file)) {
77            return;
78        }
79    } else {
80        if (!SilenceFileErrors) {
81            std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
82            return;
83        }
84    }
85
86    const auto fileSize = file_size(file);
87   
88    if (fileSize > 0) {
89        try {
90            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
91            char * fileBuffer = const_cast<char *>(source.data());
92           
93            codegen::BlockSize = 128;
94            std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize);
95           
96            const unsigned numOfGroups = codegen::GroupNum;
97            if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
98                posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
99                std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
100                exit(-1);
101            }
102
103            ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
104            source.close();
105        } catch (std::exception & e) {
106            if (!SilenceFileErrors) {
107                std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
108                return;
109            }
110        }
111    } else {
112        std::cout << 0 << std::endl;
113    }
114#endif
115}
116
117uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) const {
118    const int32_t fd = open(fileName.c_str(), O_RDONLY);
119    if (LLVM_UNLIKELY(fd == -1)) {
120        return 0;
121    }
122    const auto result = doGrep(fd, fileIdx);
123    close(fd);
124    return result;
125}
126
127uint64_t GrepEngine::doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) const {
128    assert (mGrepFunction);
129    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, const uint32_t fileIdx);
130    return reinterpret_cast<GrepFunctionType>(mGrepFunction)(fileDescriptor, fileIdx);
131}
132
133void GrepEngine::doGrep(const char * buffer, const uint64_t length, const uint32_t fileIdx) const {
134    assert (mGrepFunction);
135    typedef uint64_t (*GrepFunctionType)(const char * buffer, const uint64_t length, const uint32_t fileIdx);
136    reinterpret_cast<GrepFunctionType>(mGrepFunction)(buffer, length, fileIdx);
137}
138
139static int * total_count;
140static std::stringstream * resultStrs = nullptr;
141static std::vector<std::string> inputFiles;
142
143void initFileResult(std::vector<std::string> filenames){
144    const int n = filenames.size();
145    if (n > 1) {
146        ShowFileNames = true;
147    }
148    inputFiles = filenames;
149    resultStrs = new std::stringstream[n];
150    total_count = new int[n];
151    for (unsigned i = 0; i < inputFiles.size(); ++i){
152        total_count[i] = 0;
153    }
154
155}
156
157template<typename CodeUnit>
158void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
159
160//    errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
161
162    assert (buffer);
163    assert (line_start <= line_end);
164    assert (line_end <= filesize);
165
166    if (ShowFileNames) {
167        resultStrs[fileIdx] << inputFiles[fileIdx] << ':';
168    }
169    if (ShowLineNumbers) {
170        // Internally line numbers are counted from 0.  For display, adjust
171        // the line number so that lines are numbered from 1.
172        resultStrs[fileIdx] << lineNum+1 << ":";
173    }
174
175    // If the line "starts" on the LF of a CRLF, it is actually the end of the last line.
176    if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
177        ++line_start;
178    }
179
180    if (LLVM_UNLIKELY(line_end == filesize)) {
181        // The match position is at end-of-file.   We have a final unterminated line.
182        resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
183        if (NormalizeLineBreaks) {
184            resultStrs[fileIdx] << '\n';  // terminate it
185        }
186    } else {
187        const auto end_byte = buffer[line_end];
188        if (NormalizeLineBreaks) {
189            if (LLVM_UNLIKELY(end_byte == 0x85)) {
190                // Line terminated with NEL, on the second byte.  Back up 1.
191                line_end -= 1;
192            } else if (LLVM_UNLIKELY(end_byte > 0xD)) {
193                // Line terminated with PS or LS, on the third byte.  Back up 2.
194                line_end -= 2;
195            }
196            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
197            resultStrs[fileIdx] << '\n';
198        } else {
199            if (end_byte == 0x0D) {
200                // Check for line_end on first byte of CRLF; we don't want to access past the end of buffer.
201                if ((line_end + 1) < filesize) {
202                    if (buffer[line_end + 1] == 0x0A) {
203                        // Found CRLF; preserve both bytes.
204                        ++line_end;
205                    }
206                }
207            }
208            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start + 1) * sizeof(CodeUnit));
209        }
210    }
211}
212
213void PrintResult(bool CountOnly, std::vector<size_t> & total_CountOnly){
214    if (CountOnly) {
215        if (!ShowFileNames) {
216            for (unsigned i = 0; i < inputFiles.size(); ++i){
217                std::cout << total_CountOnly[i] << std::endl;
218            }
219        } else {
220            for (unsigned i = 0; i < inputFiles.size(); ++i){
221                std::cout << inputFiles[i] << ':' << total_CountOnly[i] << std::endl;
222            };
223        }
224    } else {
225        for (unsigned i = 0; i < inputFiles.size(); ++i){
226            std::cout << resultStrs[i].str();
227        }
228    }
229}
230
231void insert_codepoints(const size_t lineNum, const size_t line_start, const size_t line_end, const char * const buffer) {
232    assert (buffer);
233    assert (line_start <= line_end);
234    re::codepoint_t c = 0;
235    size_t line_pos = line_start;
236    while (isxdigit(buffer[line_pos])) {
237        assert (line_pos < line_end);
238        if (isdigit(buffer[line_pos])) {
239            c = (c << 4) | (buffer[line_pos] - '0');
240        }
241        else {
242            c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
243        }
244        line_pos++;
245    }
246    assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
247    parsedCodePointSet->insert(c);
248}
249
250void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
251    assert (line_start <= line_end);
252    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
253}
254
255void GrepEngine::grepCodeGen_nvptx(std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16) {
256
257    NVPTXDriver pxDriver("engine");
258    auto & idb = pxDriver.getBuilder();
259    Module * M = idb->getModule();
260
261    const unsigned segmentSize = codegen::SegmentSize;
262    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
263    const unsigned encodingBits = UTF_16 ? 16 : 8;
264
265    Type * const int64Ty = idb->getInt64Ty();
266    Type * const int32Ty = idb->getInt32Ty();
267    Type * const size_ty = idb->getSizeTy();
268    Type * const sizeTyPtr = PointerType::get(size_ty, 1);
269    Type * const int64tyPtr = PointerType::get(int64Ty, 1);
270    Type * const voidTy = idb->getVoidTy();
271
272    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
273    mainFunc->setCallingConv(CallingConv::C);
274    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
275    auto args = mainFunc->arg_begin();
276
277    Value * const inputPtr = &*(args++);
278    inputPtr->setName("inputPtr");
279    Value * const startPointsPtr = &*(args++);
280    startPointsPtr->setName("startPointsPtr");
281    Value * const bufferSizesPtr = &*(args++);
282    bufferSizesPtr->setName("bufferSizesPtr");
283    Value * const outputPtr = &*(args++);
284    outputPtr->setName("outputPtr");
285
286    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
287    Value * tid = idb->CreateCall(tidFunc);
288    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
289    Value * bid = idb->CreateCall(bidFunc);
290
291    Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
292    Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
293    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
294    Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
295    Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
296    Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
297
298    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
299    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
300    sourceK->setInitialArguments({inputStream, bufferSize});
301    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
302
303    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));   
304    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
305    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
306 
307    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));   
308    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
309    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
310   
311    const auto n = REs.size();
312
313    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
314
315    for(unsigned i = 0; i < n; ++i){
316        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
317        kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
318        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
319        MatchResultsBufs[i] = MatchResults;
320    }
321    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
322    if (REs.size() > 1) {
323        MergedResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
324        kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
325        pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
326    }
327
328    kernel::MatchCount matchCountK(idb);
329    pxDriver.addKernelCall(matchCountK, {MergedResults}, {});
330    pxDriver.generatePipelineIR();
331
332    idb->setKernel(&matchCountK);
333    Value * matchedLineCount = idb->getScalarField("matchedLineCount");
334    matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
335   
336    Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
337    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
338    idb->CreateStore(matchedLineCount, outputThreadPtr);
339    idb->CreateRetVoid();
340
341    pxDriver.finalizeAndCompile(mainFunc, PTXFilename);
342}
343
344void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, GrepSource grepSource, const GrepType grepType) {
345
346    ParabixDriver pxDriver("engine");
347    auto & idb = pxDriver.getBuilder();
348    Module * M = idb->getModule();
349
350    const unsigned segmentSize = codegen::SegmentSize;
351    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
352    const unsigned encodingBits = UTF_16 ? 16 : 8;
353
354    Type * const int64Ty = idb->getInt64Ty();
355    Type * const int32Ty = idb->getInt32Ty();
356
357    Function * mainFunc = nullptr;
358    Value * fileIdx = nullptr;
359    StreamSetBuffer * ByteStream = nullptr;
360    kernel::Kernel * sourceK = nullptr;
361
362    if (grepSource == GrepSource::Internal) {
363
364        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt8PtrTy(), int64Ty, int32Ty, nullptr));
365        mainFunc->setCallingConv(CallingConv::C);
366        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
367        auto args = mainFunc->arg_begin();
368
369        Value * const buffer = &*(args++);
370        buffer->setName("buffer");
371
372        Value * length = &*(args++);
373        length->setName("length");
374        length = idb->CreateZExtOrTrunc(length, idb->getSizeTy());
375
376        fileIdx = &*(args++);
377        fileIdx->setName("fileIdx");
378
379        ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
380
381        sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
382        sourceK->setInitialArguments({buffer, length});
383
384    } else {
385
386        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt32Ty(), int32Ty, nullptr));
387        mainFunc->setCallingConv(CallingConv::C);
388        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
389        auto args = mainFunc->arg_begin();
390
391        Value * const fileDescriptor = &*(args++);
392        fileDescriptor->setName("fileDescriptor");
393        fileIdx = &*(args++);
394        fileIdx->setName("fileIdx");
395
396        ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
397
398        if (grepSource == GrepSource::File) {
399            sourceK = pxDriver.addKernelInstance(make_unique<kernel::MMapSourceKernel>(idb, segmentSize));
400            sourceK->setInitialArguments({fileDescriptor});
401        } else { // if (grepSource == GrepSource::StdIn) {
402            sourceK = pxDriver.addKernelInstance(make_unique<kernel::ReadSourceKernel>(idb, segmentSize));
403            sourceK->setInitialArguments({idb->getInt32(STDIN_FILENO)});
404        }
405    }
406
407    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
408    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
409   
410    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
411    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
412   
413    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
414    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
415    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
416   
417    const auto n = REs.size();
418
419    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
420
421    for(unsigned i = 0; i < n; ++i){
422        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
423        kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
424        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
425        MatchResultsBufs[i] = MatchResults;
426    }
427    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
428    if (REs.size() > 1) {
429        MergedResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
430        kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
431        pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
432    }
433   
434    if (AlgorithmOptionIsSet(re::InvertMatches)) {
435        kernel::Kernel * invertK = pxDriver.addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
436        StreamSetBuffer * OriginalMatches = MergedResults;
437        MergedResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
438        pxDriver.makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {MergedResults});
439    }
440    if (MaxCount > 0) {
441        kernel::Kernel * untilK = pxDriver.addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
442        untilK->setInitialArguments({idb->getSize(MaxCount)});
443        StreamSetBuffer * AllMatches = MergedResults;
444        MergedResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
445        pxDriver.makeKernelCall(untilK, {AllMatches}, {MergedResults});
446    }
447    if (CountOnly) {
448        kernel::MatchCount matchCountK(idb);
449        pxDriver.addKernelCall(matchCountK, {MergedResults}, {});
450        pxDriver.generatePipelineIR();
451        idb->setKernel(&matchCountK);
452        Value * matchedLineCount = idb->getScalarField("matchedLineCount");
453        matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
454        idb->CreateRet(matchedLineCount);
455        pxDriver.linkAndFinalize();
456    } else {
457        kernel::ScanMatchKernel scanMatchK(idb, grepType, encodingBits);
458        scanMatchK.setInitialArguments({fileIdx});
459        pxDriver.addKernelCall(scanMatchK, {MergedResults, LineBreakStream, ByteStream}, {});
460        switch (grepType) {
461            case GrepType::Normal:
462                if (UTF_16) {
463                    pxDriver.LinkFunction(scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
464                } else {
465                    pxDriver.LinkFunction(scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
466                }
467                break;
468            case GrepType::NameExpression:
469                pxDriver.LinkFunction(scanMatchK, "matcher", &insert_codepoints);
470                break;
471            case GrepType::PropertyValue:
472                pxDriver.LinkFunction(scanMatchK, "matcher", &insert_property_values);
473                break;
474        }
475        pxDriver.generatePipelineIR();
476        idb->CreateRet(idb->getInt64(0));
477        pxDriver.linkAndFinalize();
478    }
479
480    mGrepFunction = pxDriver.getPointerToMain();
481}
482
483
484re::CC * GrepEngine::grepCodepoints() {
485    parsedCodePointSet = re::makeCC();
486    char * mFileBuffer = getUnicodeNameDataPtr();
487    size_t mFileSize = getUnicodeNameDataSize();
488    doGrep(mFileBuffer, mFileSize, 0);
489    return parsedCodePointSet;
490}
491
492const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
493    enum { MaxSupportedVectorWidthInBytes = 32 };
494    AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
495    parsedPropertyValues.clear();
496    const std::string & str = UCD::getPropertyValueGrepString(propertyName);
497    const auto n = str.length();
498    // NOTE: MaxSupportedVectorWidthInBytes of trailing 0s are needed to prevent the grep function from
499    // erroneously matching garbage data when loading the final partial block.
500    char * aligned = alloc.allocate(n + MaxSupportedVectorWidthInBytes, 0);
501    std::memcpy(aligned, str.data(), n);
502    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
503    doGrep(aligned, n, 0);
504    alloc.deallocate(aligned, 0);
505    return parsedPropertyValues;
506}
507
508GrepEngine::GrepEngine()
509: mGrepFunction(nullptr) {
510
511}
Note: See TracBrowser for help on using the repository browser.