source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 4946

Last change on this file since 4946 was 4946, checked in by cameron, 3 years ago

Refactor and encapsulate grep codegen/execution in GrepEngine?

File size: 5.1 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_engine.h>
8#include <toolchain.h>
9#include <utf_encoding.h>
10#include <pablo/pablo_compiler.h>
11#include <kernels/pipeline.h>
12#include <llvm/IR/Function.h>
13#include <llvm/IR/Type.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/MCJIT.h>
16#include <llvm/IRReader/IRReader.h>
17#include <llvm/Support/Debug.h>
18#include <llvm/IR/Verifier.h>
19
20#include <fstream>
21#include <sstream>
22#include <iostream>
23#include <string>
24#include <stdint.h>
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <unistd.h>
29#include <errno.h>
30#include <sys/types.h>
31#include <sys/stat.h>
32#include <stdexcept>
33#include <cctype>
34
35
36#include <llvm/Support/raw_os_ostream.h>
37
38// mmap system
39#ifdef USE_BOOST_MMAP
40#include <boost/filesystem.hpp>
41#include <boost/iostreams/device/mapped_file.hpp>
42using namespace boost::iostreams;
43using namespace boost::filesystem;
44#else
45#include <sys/mman.h>
46#endif
47#include <fcntl.h>
48
49#include <kernels/kernel.h>
50
51
52
53bool GrepEngine::finalLineIsUnterminated() const {
54    if (mFileSize == 0) return false;
55    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
56    // LF through CR are line break characters
57    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
58    // Other line breaks require at least two bytes.
59    if (mFileSize == 1) return true;
60    // NEL
61    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
62    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
63    if (mFileSize == 2) return true;
64    // LS and PS
65    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
66    return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
67}
68
69void GrepEngine::doGrep(const std::string & fileName) {
70
71    mFileName = fileName;
72
73#ifdef USE_BOOST_MMAP
74    const path file(mFileName);
75    if (exists(file)) {
76        if (is_directory(file)) {
77            return;
78        }
79    } else {
80        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
81        return;
82    }
83
84    mFileSize = file_size(file);
85    mapped_file mFile;
86    if (mFileSize == 0) {
87        mFileBuffer = nullptr;
88    }
89    else {
90        try {
91            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
92        } catch (std::ios_base::failure e) {
93            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
94            return;
95        }
96        mFileBuffer = mFile.data();
97    }
98#else
99    struct stat infile_sb;
100    const int fdSrc = open(mFileName.c_str(), O_RDONLY);
101    if (fdSrc == -1) {
102        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
103        return;
104    }
105    if (fstat(fdSrc, &infile_sb) == -1) {
106        std::cerr << "Error: cannot stat " << mFileName << " for processing. Skipped.\n";
107        close (fdSrc);
108        return;
109    }
110    if (S_ISDIR(infile_sb.st_mode)) {
111        close (fdSrc);
112        return;
113    }
114    mFileSize = infile_sb.st_size;
115    if (mFileSize == 0) {
116        mFileBuffer = nullptr;
117    }
118    else {
119        mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
120        if (mFileBuffer == MAP_FAILED) {
121            if (errno ==  ENOMEM) {
122                std::cerr << "Error:  mmap of " << mFileName << " failed: out of memory\n";
123                close (fdSrc);
124            }
125            else {
126                std::cerr << "Error: mmap of " << mFileName << " failed with errno " << errno << ". Skipped.\n";
127                close (fdSrc);
128            }
129            return;
130        }
131    }
132#endif
133
134    llvm::raw_os_ostream out(std::cout);
135
136    uint64_t finalLineUnterminated = 0;
137    if(finalLineIsUnterminated())
138        finalLineUnterminated = 1;
139
140    mMainFcn(mFileBuffer, mFileSize, fileName.c_str(), finalLineUnterminated);
141
142    PrintTotalCount();
143   
144#ifdef USE_BOOST_MMAP
145    mFile.close();
146#else
147    munmap((void *)mFileBuffer, mFileSize);
148    close(fdSrc);
149#endif   
150}
151
152void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression) {
153                           
154    Module * M = new Module("moduleName", getGlobalContext());
155   
156    IDISA::IDISA_Builder * idb = GetNativeIDISA_Builder(M, VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE/64));
157
158    PipelineBuilder pipelineBuilder(M, idb);
159
160    Encoding encoding(Encoding::Type::UTF_8, 8);
161    re_ast = regular_expression_passes(encoding, re_ast);   
162    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
163
164    pipelineBuilder.CreateKernels(function, isNameExpression);
165
166    pipelineBuilder.ExecuteKernels();
167
168    llvm::Function * main_IR = M->getFunction("Main");
169    mEngine = JIT_to_ExecutionEngine(M);
170   
171    icgrep_Linking(M, mEngine);
172    verifyModule(*M, &dbgs());
173    mEngine->finalizeObject();
174    delete idb;
175
176    mMainFcn = (main_fcn_T) mEngine->getPointerToFunction(main_IR);
177}
178
179
180re::CC *  GrepEngine::grepCodepoints(const std::string & UNameFile) {
181    setParsedCodePointSet();
182    doGrep(UNameFile);
183    return getParsedCodePointSet();
184}
Note: See TracBrowser for help on using the repository browser.