source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 5481

Last change on this file since 5481 was 5481, checked in by cameron, 2 years ago

Refactoring grepEngine: separate out codepoint/property value grep

File size: 26.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "grep_engine.h"
8#include "grep_interface.h"
9#include <llvm/IR/Module.h>
10#include <boost/filesystem.hpp>
11#include <UCD/UnicodeNameData.h>
12#include <UCD/resolve_properties.h>
13#include <kernels/cc_kernel.h>
14#include <kernels/grep_kernel.h>
15#include <kernels/linebreak_kernel.h>
16#include <kernels/streams_merge.h>
17#include <kernels/match_count.h>
18#include <kernels/source_kernel.h>
19#include <kernels/s2p_kernel.h>
20#include <kernels/scanmatchgen.h>
21#include <kernels/streamset.h>
22#include <kernels/until_n.h>
23#include <kernels/kernel_builder.h>
24#include <pablo/pablo_kernel.h>
25#include <re/re_cc.h>
26#include <re/re_toolchain.h>
27#include <toolchain/toolchain.h>
28#include <toolchain/cpudriver.h>
29#include <toolchain/NVPTXDriver.h>
30#include <iostream>
31#include <sstream>
32#include <cc/multiplex_CCs.h>
33#include <llvm/Support/raw_ostream.h>
34#include <util/aligned_allocator.h>
35#include <sys/stat.h>
36#include <fcntl.h>
37
38#ifdef CUDA_ENABLED
39#include <preprocess.cpp>
40#include <IR_Gen/CudaDriver.h>
41#endif
42
43using namespace parabix;
44using namespace llvm;
45
46namespace grep {
47
48
49size_t * startPoints = nullptr;
50size_t * accumBytes = nullptr;
51
52
53void GrepEngine::doGrep(const std::string & fileName) const{
54#ifdef CUDA_ENABLED
55    const bool CountOnly = true;
56    boost::filesystem::path file(fileName);
57    if (exists(file)) {
58        if (is_directory(file)) {
59            return;
60        }
61    } else {
62        if (!NoMessagesFlag) {
63            std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
64            return;
65        }
66    }
67
68    const auto fileSize = file_size(file);
69   
70    if (fileSize > 0) {
71        try {
72            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
73            char * fileBuffer = const_cast<char *>(source.data());
74           
75            codegen::BlockSize = 128;
76            std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize);
77           
78            const unsigned numOfGroups = codegen::GroupNum;
79            if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
80                posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
81                std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
82                exit(-1);
83            }
84            const auto PTXFilename = mGrepDriver->getBuilder()->getModule()->getModuleIdentifier() + ".ptx";
85            ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
86            source.close();
87        } catch (std::exception & e) {
88            if (!NoMessagesFlag) {
89                std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
90                return;
91            }
92        }
93    } else {
94        std::cout << 0 << std::endl;
95    }
96#endif
97}
98
99uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) const {
100    const int32_t fd = open(fileName.c_str(), O_RDONLY);
101    if (LLVM_UNLIKELY(fd == -1)) {
102        return 0;
103    }
104    const auto result = doGrep(fd, fileIdx);
105    close(fd);
106    return result;
107}
108
109uint64_t GrepEngine::doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) const {
110    assert (mGrepDriver);
111    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, const uint32_t fileIdx);
112    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
113    return f(fileDescriptor, fileIdx);
114}
115
116static int * total_count;
117static std::stringstream * resultStrs = nullptr;
118static std::vector<std::string> inputFiles;
119
120void initFileResult(std::vector<std::string> filenames){
121    const int n = filenames.size();
122    if ((n > 1) && !NoFilenameFlag) {
123        WithFilenameFlag = true;
124    }
125    inputFiles = filenames;
126    resultStrs = new std::stringstream[n];
127    total_count = new int[n];
128    for (unsigned i = 0; i < inputFiles.size(); ++i){
129        total_count[i] = 0;
130    }
131
132}
133
134template<typename CodeUnit>
135void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
136
137//    errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
138
139    assert (buffer);
140    assert (line_start <= line_end);
141    assert (line_end <= filesize);
142
143    if (WithFilenameFlag) {
144        resultStrs[fileIdx] << inputFiles[fileIdx] << ':';
145    }
146    if (LineNumberFlag) {
147        // Internally line numbers are counted from 0.  For display, adjust
148        // the line number so that lines are numbered from 1.
149        resultStrs[fileIdx] << lineNum+1 << ":";
150    }
151
152    // If the line "starts" on the LF of a CRLF, it is actually the end of the last line.
153    if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
154        ++line_start;
155    }
156
157    if (LLVM_UNLIKELY(line_end == filesize)) {
158        // The match position is at end-of-file.   We have a final unterminated line.
159        resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
160        if (NormalizeLineBreaksFlag) {
161            resultStrs[fileIdx] << '\n';  // terminate it
162        }
163    } else {
164        const auto end_byte = buffer[line_end];
165        if (grep::NormalizeLineBreaksFlag) {
166            if (LLVM_UNLIKELY(end_byte == 0x85)) {
167                // Line terminated with NEL, on the second byte.  Back up 1.
168                line_end -= 1;
169            } else if (LLVM_UNLIKELY(end_byte > 0xD)) {
170                // Line terminated with PS or LS, on the third byte.  Back up 2.
171                line_end -= 2;
172            }
173            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
174            resultStrs[fileIdx] << '\n';
175        } else {
176            if (end_byte == 0x0D) {
177                // Check for line_end on first byte of CRLF; we don't want to access past the end of buffer.
178                if ((line_end + 1) < filesize) {
179                    if (buffer[line_end + 1] == 0x0A) {
180                        // Found CRLF; preserve both bytes.
181                        ++line_end;
182                    }
183                }
184            }
185            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start + 1) * sizeof(CodeUnit));
186        }
187    }
188}
189
190void PrintResult(GrepModeType grepMode, std::vector<size_t> & total_CountOnly){
191    if (grepMode == NormalMode) {
192        int returnCode = MatchNotFoundExitCode;
193        for (unsigned i = 0; i < inputFiles.size(); ++i){
194            std::cout << resultStrs[i].str();
195            if (!resultStrs[i].str().empty()) returnCode = MatchFoundExitCode;
196        }
197        exit(returnCode);
198    }
199    if (grepMode == CountOnly) {
200        size_t total = 0;
201        if (!WithFilenameFlag) {
202            for (unsigned i = 0; i < inputFiles.size(); ++i) {
203                std::cout << total_CountOnly[i] << std::endl;
204                total += total_CountOnly[i];
205            }
206        } else {
207            for (unsigned i = 0; i < inputFiles.size(); ++i){
208                std::cout << inputFiles[i] << ':' << total_CountOnly[i] << std::endl;
209                total += total_CountOnly[i];
210            };
211        }
212        exit(total == 0 ? MatchNotFoundExitCode : MatchFoundExitCode);
213    }
214    else if (grepMode == FilesWithMatch || grepMode == FilesWithoutMatch ) {
215        size_t total = 0;
216        size_t requiredCount = grepMode == FilesWithMatch ? 1 : 0;
217        for (unsigned i = 0; i < inputFiles.size(); ++i) {
218            if (total_CountOnly[i] == requiredCount) {
219                std::cout << inputFiles[i] << std::endl;
220            }
221            total += total_CountOnly[i];
222        }
223        exit(total == 0 ? MatchNotFoundExitCode : MatchFoundExitCode);
224    } else /* QuietMode */ {
225        for (unsigned i = 0; i < inputFiles.size(); ++i){
226            if (total_CountOnly[i] > 0) exit(MatchFoundExitCode);
227        }
228        exit(MatchNotFoundExitCode);
229    }
230}
231
232void GrepEngine::grepCodeGen_nvptx(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16) {
233
234    assert (mGrepDriver == nullptr);
235
236    mGrepDriver = new NVPTXDriver("engine");
237    auto & idb = mGrepDriver->getBuilder();
238    Module * M = idb->getModule();
239
240    const unsigned segmentSize = codegen::SegmentSize;
241    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
242    const unsigned encodingBits = UTF_16 ? 16 : 8;
243
244    Type * const int64Ty = idb->getInt64Ty();
245    Type * const int32Ty = idb->getInt32Ty();
246    Type * const size_ty = idb->getSizeTy();
247    Type * const sizeTyPtr = PointerType::get(size_ty, 1);
248    Type * const int64tyPtr = PointerType::get(int64Ty, 1);
249    Type * const voidTy = idb->getVoidTy();
250
251    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
252    mainFunc->setCallingConv(CallingConv::C);
253    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
254    auto args = mainFunc->arg_begin();
255
256    Value * const inputPtr = &*(args++);
257    inputPtr->setName("inputPtr");
258    Value * const startPointsPtr = &*(args++);
259    startPointsPtr->setName("startPointsPtr");
260    Value * const bufferSizesPtr = &*(args++);
261    bufferSizesPtr->setName("bufferSizesPtr");
262    Value * const outputPtr = &*(args++);
263    outputPtr->setName("outputPtr");
264
265    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
266    Value * tid = idb->CreateCall(tidFunc);
267    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
268    Value * bid = idb->CreateCall(bidFunc);
269
270    Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
271    Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
272    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
273    Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
274    Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
275    Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
276
277    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
278    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
279    sourceK->setInitialArguments({inputStream, bufferSize});
280    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
281
282    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
283    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
284    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
285 
286    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
287    kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
288    mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
289   
290    const auto n = REs.size();
291
292    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
293
294    for(unsigned i = 0; i < n; ++i){
295        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
296        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
297        mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
298        MatchResultsBufs[i] = MatchResults;
299    }
300    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
301    if (REs.size() > 1) {
302        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
303        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
304        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
305    }
306
307    kernel::MatchCount matchCountK(idb);
308    mGrepDriver->addKernelCall(matchCountK, {MergedResults}, {});
309    mGrepDriver->generatePipelineIR();
310
311    idb->setKernel(&matchCountK);
312    Value * matchedLineCount = idb->getScalarField("matchedLineCount");
313    matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
314   
315    Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
316    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
317    idb->CreateStore(matchedLineCount, outputThreadPtr);
318    idb->CreateRetVoid();
319
320    mGrepDriver->finalizeObject();
321}
322
323void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const GrepModeType grepMode, const bool UTF_16, GrepSource grepSource) {
324
325    assert (mGrepDriver == nullptr);
326    mGrepDriver = new ParabixDriver("engine");
327    auto & idb = mGrepDriver->getBuilder();
328    Module * M = idb->getModule();
329
330    const unsigned segmentSize = codegen::SegmentSize;
331    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
332    const unsigned encodingBits = UTF_16 ? 16 : 8;
333
334    Type * const int64Ty = idb->getInt64Ty();
335    Type * const int32Ty = idb->getInt32Ty();
336
337    Function * mainFunc = nullptr;
338    Value * fileIdx = nullptr;
339    StreamSetBuffer * ByteStream = nullptr;
340    kernel::Kernel * sourceK = nullptr;
341   
342    size_t MatchLimit = ((grepMode == QuietMode) | (grepMode == FilesWithMatch) | (grepMode == FilesWithoutMatch)) ? 1 : MaxCountFlag;
343
344    if (grepSource == GrepSource::Internal) {
345
346        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt8PtrTy(), int64Ty, int32Ty, nullptr));
347        mainFunc->setCallingConv(CallingConv::C);
348        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
349        auto args = mainFunc->arg_begin();
350
351        Value * const buffer = &*(args++);
352        buffer->setName("buffer");
353
354        Value * length = &*(args++);
355        length->setName("length");
356        length = idb->CreateZExtOrTrunc(length, idb->getSizeTy());
357
358        fileIdx = &*(args++);
359        fileIdx->setName("fileIdx");
360
361        ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
362
363        sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
364        sourceK->setInitialArguments({buffer, length});
365
366    } else {
367
368        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, idb->getInt32Ty(), int32Ty, nullptr));
369        mainFunc->setCallingConv(CallingConv::C);
370        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
371        auto args = mainFunc->arg_begin();
372
373        Value * const fileDescriptor = &*(args++);
374        fileDescriptor->setName("fileDescriptor");
375        fileIdx = &*(args++);
376        fileIdx->setName("fileIdx");
377
378        ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
379
380        if (grepSource == GrepSource::File) {
381            sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::MMapSourceKernel>(idb, segmentSize));
382            sourceK->setInitialArguments({fileDescriptor});
383        } else { // if (grepSource == GrepSource::StdIn) {
384            sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::ReadSourceKernel>(idb, segmentSize));
385            sourceK->setInitialArguments({idb->getInt32(STDIN_FILENO)});
386        }
387    }
388
389    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
390    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));
391   
392    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
393    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
394   
395    kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
396    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
397    mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
398   
399    const auto n = REs.size();
400
401    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
402
403    for(unsigned i = 0; i < n; ++i){
404        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
405        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
406        mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
407        MatchResultsBufs[i] = MatchResults;
408    }
409    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
410    if (REs.size() > 1) {
411        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
412        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
413        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
414    }
415   
416    if (InvertMatchFlag) {
417        kernel::Kernel * invertK = mGrepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
418        StreamSetBuffer * OriginalMatches = MergedResults;
419        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
420        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {MergedResults});
421    }
422    if (MatchLimit > 0) {
423        kernel::Kernel * untilK = mGrepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
424        untilK->setInitialArguments({idb->getSize(MatchLimit)});
425        StreamSetBuffer * AllMatches = MergedResults;
426        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
427        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {MergedResults});
428    }
429    if (grepMode != NormalMode) {
430        kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::MatchCount>(idb));
431        mGrepDriver->makeKernelCall(matchCountK, {MergedResults}, {});
432        mGrepDriver->generatePipelineIR();
433        idb->setKernel(matchCountK);
434        Value * matchedLineCount = idb->getScalarField("matchedLineCount");
435        matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
436        idb->CreateRet(matchedLineCount);
437    } else {
438        kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::Normal, encodingBits));
439        scanMatchK->setInitialArguments({fileIdx});
440        mGrepDriver->makeKernelCall(scanMatchK, {MergedResults, LineBreakStream, ByteStream}, {});
441        if (UTF_16) {
442            mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
443        } else {
444            mGrepDriver->LinkFunction(*scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
445        }
446        mGrepDriver->generatePipelineIR();
447        idb->CreateRet(idb->getInt64(0));
448    }
449    mGrepDriver->finalizeObject();
450}
451
452GrepEngine::GrepEngine()
453: mGrepDriver(nullptr) {
454
455}
456
457GrepEngine::~GrepEngine() {
458    delete mGrepDriver;
459}
460
461
462   
463static re::CC * parsedCodePointSet = nullptr;
464
465void insert_codepoints(const size_t lineNum, const size_t line_start, const size_t line_end, const char * const buffer) {
466    assert (buffer);
467    assert (line_start <= line_end);
468    re::codepoint_t c = 0;
469    size_t line_pos = line_start;
470    while (isxdigit(buffer[line_pos])) {
471        assert (line_pos < line_end);
472        if (isdigit(buffer[line_pos])) {
473            c = (c << 4) | (buffer[line_pos] - '0');
474        }
475        else {
476            c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
477        }
478        line_pos++;
479    }
480    assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
481    parsedCodePointSet->insert(c);
482}
483
484re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength) {
485    parsedCodePointSet = re::makeCC();       
486    const unsigned segmentSize = 8;
487   
488    ParabixDriver pxDriver("codepointEngine");
489    auto & idb = pxDriver.getBuilder();
490    Module * M = idb->getModule();
491   
492    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
493    mainFunc->setCallingConv(CallingConv::C);
494    auto args = mainFunc->arg_begin();
495    Value * const buffer = &*(args++);
496    buffer->setName("buffer");
497    Value * length = &*(args++);
498    length->setName("length");
499   
500    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
501   
502    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
503    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
504    sourceK->setInitialArguments({buffer, length});
505    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
506   
507    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
508   
509    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
510    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
511   
512    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
513    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
514    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
515   
516    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
517    kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, pattern));
518    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
519   
520    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::NameExpression, 8));
521    scanMatchK->setInitialArguments({idb->getInt32(0)});
522    pxDriver.makeKernelCall(scanMatchK, {MatchResults, LineBreakStream, ByteStream}, {});
523    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_codepoints);
524    pxDriver.generatePipelineIR();
525    idb->CreateRetVoid();
526    pxDriver.finalizeObject();
527   
528    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
529    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
530    f(UnicodeDataBuffer, bufferLength);
531   
532    return parsedCodePointSet;   
533}
534
535   
536static std::vector<std::string> parsedPropertyValues;
537
538void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
539    assert (line_start <= line_end);
540    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
541}
542
543
544const std::vector<std::string> & grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern) {
545    enum { MaxSupportedVectorWidthInBytes = 32 };
546    AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
547    parsedPropertyValues.clear();
548    const std::string & str = UCD::getPropertyValueGrepString(propertyName);
549    const auto n = str.length();
550    // NOTE: MaxSupportedVectorWidthInBytes of trailing 0s are needed to prevent the grep function from
551    // erroneously matching garbage data when loading the final partial block.
552    char * aligned = alloc.allocate(n + MaxSupportedVectorWidthInBytes, 0);
553    std::memcpy(aligned, str.data(), n);
554    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
555   
556    const unsigned segmentSize = 8;
557   
558    ParabixDriver pxDriver("propertyValueEngine");
559    auto & idb = pxDriver.getBuilder();
560    Module * M = idb->getModule();
561   
562    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
563    mainFunc->setCallingConv(CallingConv::C);
564    auto args = mainFunc->arg_begin();
565    Value * const buffer = &*(args++);
566    buffer->setName("buffer");
567    Value * length = &*(args++);
568    length->setName("length");
569   
570    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
571   
572    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
573    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
574    sourceK->setInitialArguments({buffer, length});
575    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
576   
577    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
578   
579    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
580    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
581   
582    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
583    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
584    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
585   
586    StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
587    kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, propertyValuePattern));
588    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
589   
590    kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::PropertyValue, 8));
591    scanMatchK->setInitialArguments({idb->getInt32(0)});
592    pxDriver.makeKernelCall(scanMatchK, {MatchResults, LineBreakStream, ByteStream}, {});
593    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_property_values);
594    pxDriver.generatePipelineIR();
595    idb->CreateRetVoid();
596    pxDriver.finalizeObject();
597   
598    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
599    auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
600    f(aligned, n);
601   
602    alloc.deallocate(aligned, 0);
603    return parsedPropertyValues;
604}
605
606   
607}
Note: See TracBrowser for help on using the repository browser.