source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 5025

Last change on this file since 5025 was 5025, checked in by xuedongx, 3 years ago

If '-c', ignore the scanmatch kernel.

File size: 4.7 KB
RevLine 
[4324]1/*
[4947]2 *  Copyright (c) 2016 International Characters.
[4324]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4946]7#include <grep_engine.h>
[4984]8#include <IDISA/idisa_builder.h>
9#include <IDISA/idisa_target.h>
10#include <re/re_toolchain.h>
11#include <pablo/pablo_toolchain.h>
[4946]12#include <toolchain.h>
13#include <utf_encoding.h>
14#include <pablo/pablo_compiler.h>
15#include <kernels/pipeline.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Type.h>
18#include <llvm/IR/Module.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include <llvm/IRReader/IRReader.h>
21#include <llvm/Support/Debug.h>
22#include <llvm/IR/Verifier.h>
[4949]23#include <UCD/UnicodeNameData.h>
[4324]24
25#include <fstream>
26#include <sstream>
27#include <iostream>
28#include <string>
29#include <stdint.h>
30
31#include <stdio.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <errno.h>
35#include <sys/types.h>
36#include <sys/stat.h>
[4430]37#include <stdexcept>
[4802]38#include <cctype>
[4324]39
40
[4788]41#include <llvm/Support/raw_os_ostream.h>
42
[4324]43// mmap system
[4788]44#include <boost/filesystem.hpp>
[4778]45#include <boost/iostreams/device/mapped_file.hpp>
[4788]46using namespace boost::iostreams;
47using namespace boost::filesystem;
[4974]48
[4324]49#include <fcntl.h>
50
[4946]51#include <kernels/kernel.h>
[4324]52
53
54
[5001]55bool GrepEngine::finalLineIsUnterminated(const char * const fileBuffer, const size_t fileSize) {
56    if (fileSize == 0) return false;
57    unsigned char end_byte = static_cast<unsigned char>(fileBuffer[fileSize-1]);
[4478]58    // LF through CR are line break characters
59    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
60    // Other line breaks require at least two bytes.
[5001]61    if (fileSize == 1) return true;
[4788]62    // NEL
[5001]63    unsigned char penult_byte = static_cast<unsigned char>(fileBuffer[fileSize-2]);
[4478]64    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
[5001]65    if (fileSize == 2) return true;
[4478]66    // LS and PS
67    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
[5001]68    return (static_cast<unsigned char>(fileBuffer[fileSize-3]) != 0xE2) || (penult_byte != 0x80);
[4478]69}
[4324]70
[5025]71void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<int> & total_CountOnly) {
[5001]72    const path file(fileName);
[4788]73    if (exists(file)) {
74        if (is_directory(file)) {
[4969]75            return;
[4788]76        }
77    } else {
[5001]78        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
[4969]79        return;
[4788]80    }
81
[5001]82    const size_t fileSize = file_size(file);
83    if (fileSize > 0) {
[5021]84        mapped_file_source file;
[4883]85        try {
[5021]86            file.open(fileName);
87        } catch (std::exception &e) {
[5001]88            throw std::runtime_error("Boost mmap error: " + fileName + ": " + e.what());
[4883]89        }
[5021]90        char * fileBuffer = const_cast<char *>(file.data());
[5025]91        if(CountOnly){
92            total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx, finalLineIsUnterminated(fileBuffer, fileSize));
93        }
94        else{
95            mGrepFunction(fileBuffer, fileSize, fileIdx, finalLineIsUnterminated(fileBuffer, fileSize));
96        }
[5001]97        file.close();
[4883]98    }
[5021]99    else {
[5025]100        if(CountOnly) {
101            mGrepFunction_CountOnly(nullptr, 0, fileIdx, false);
102        }
103        else{
104            mGrepFunction(nullptr, 0, fileIdx, false);
105        }
[5021]106    }
[4949]107}
108
[4946]109
[5025]110void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool isNameExpression) {
[4969]111    Module * M = new Module(moduleName, getGlobalContext());
[4946]112   
[4952]113    IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
[4946]114
[4974]115    kernel::PipelineBuilder pipelineBuilder(M, idb);
[4946]116
117    Encoding encoding(Encoding::Type::UTF_8, 8);
[4949]118    mIsNameExpression = isNameExpression;
[4946]119    re_ast = regular_expression_passes(encoding, re_ast);   
120    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
[4984]121   
[4946]122
123    pipelineBuilder.CreateKernels(function, isNameExpression);
124
[5025]125    llvm::Function * grepIR = pipelineBuilder.ExecuteKernels(CountOnly);
[4946]126
127    mEngine = JIT_to_ExecutionEngine(M);
128   
129    icgrep_Linking(M, mEngine);
[4968]130    #ifndef NDEBUG
[4946]131    verifyModule(*M, &dbgs());
[4968]132    #endif
[4946]133    mEngine->finalizeObject();
134    delete idb;
135
[5025]136    if(CountOnly){
137        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(grepIR));
138    }
139    else{
140        mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(grepIR));
141    }
142
[4946]143}
144
[4947]145re::CC *  GrepEngine::grepCodepoints() {
[4974]146
[4946]147    setParsedCodePointSet();
[4967]148    char * mFileBuffer = getUnicodeNameDataPtr();
[4969]149    size_t mFileSize = getUnicodeNameDataSize();
[4967]150    std::string mFileName = "Uname.txt";
151
152    uint64_t finalLineUnterminated = 0;
153    if(finalLineIsUnterminated(mFileBuffer, mFileSize))
154        finalLineUnterminated = 1;   
[5017]155    mGrepFunction(mFileBuffer, mFileSize, 0, finalLineUnterminated);
[4967]156
[4946]157    return getParsedCodePointSet();
158}
[4968]159
160GrepEngine::~GrepEngine() {
161    delete mEngine;
162}
Note: See TracBrowser for help on using the repository browser.