source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 4969

Last change on this file since 4969 was 4969, checked in by lindanl, 3 years ago

Bug fixed.

File size: 5.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_engine.h>
8#include <toolchain.h>
9#include <utf_encoding.h>
10#include <pablo/pablo_compiler.h>
11#include <kernels/pipeline.h>
12#include <llvm/IR/Function.h>
13#include <llvm/IR/Type.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/MCJIT.h>
16#include <llvm/IRReader/IRReader.h>
17#include <llvm/Support/Debug.h>
18#include <llvm/IR/Verifier.h>
19#include <UCD/UnicodeNameData.h>
20
21#include <fstream>
22#include <sstream>
23#include <iostream>
24#include <string>
25#include <stdint.h>
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <unistd.h>
30#include <errno.h>
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <stdexcept>
34#include <cctype>
35
36
37#include <llvm/Support/raw_os_ostream.h>
38
39// mmap system
40#ifdef USE_BOOST_MMAP
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43using namespace boost::iostreams;
44using namespace boost::filesystem;
45#else
46#include <sys/mman.h>
47#endif
48#include <fcntl.h>
49
50#include <kernels/kernel.h>
51
52
53
54bool GrepEngine::finalLineIsUnterminated(char * mFileBuffer, size_t mFileSize) const {
55    if (mFileSize == 0) return false;
56    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
57    // LF through CR are line break characters
58    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
59    // Other line breaks require at least two bytes.
60    if (mFileSize == 1) return true;
61    // NEL
62    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
63    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
64    if (mFileSize == 2) return true;
65    // LS and PS
66    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
67    return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
68}
69
70void GrepEngine::doGrep(const std::string & fileName) {
71    std::string mFileName = fileName;
72    size_t mFileSize;
73    char * mFileBuffer;
74
75#ifdef USE_BOOST_MMAP
76    const path file(mFileName);
77    if (exists(file)) {
78        if (is_directory(file)) {
79            return;
80        }
81    } else {
82        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
83        return;
84    }
85
86    mFileSize = file_size(file);
87    mapped_file mFile;
88    if (mFileSize == 0) {
89        mFileBuffer = nullptr;
90    }
91    else {
92        try {
93            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
94        } catch (std::ios_base::failure e) {
95            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
96            return;
97        }
98        mFileBuffer = mFile.data();
99    }
100#else
101    struct stat infile_sb;
102    const int fdSrc = open(mFileName.c_str(), O_RDONLY);
103    if (fdSrc == -1) {
104        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
105        return;
106    }
107    if (fstat(fdSrc, &infile_sb) == -1) {
108        std::cerr << "Error: cannot stat " << mFileName << " for processing. Skipped.\n";
109        close (fdSrc);
110        return;
111    }
112    if (S_ISDIR(infile_sb.st_mode)) {
113        close (fdSrc);
114        return;
115    }
116    mFileSize = infile_sb.st_size;
117    if (mFileSize == 0) {
118        mFileBuffer = nullptr;
119    }
120    else {
121        mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
122        if (mFileBuffer == MAP_FAILED) {
123            if (errno ==  ENOMEM) {
124                std::cerr << "Error:  mmap of " << mFileName << " failed: out of memory\n";
125                close (fdSrc);
126            }
127            else {
128                std::cerr << "Error: mmap of " << mFileName << " failed with errno " << errno << ". Skipped.\n";
129                close (fdSrc);
130            }
131            return;
132        }
133    }
134    close(fdSrc);
135
136#endif
137   
138    uint64_t finalLineUnterminated = 0;
139    if(finalLineIsUnterminated(mFileBuffer, mFileSize))
140        finalLineUnterminated = 1;
141   
142    mMainFcn(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
143
144#ifdef USE_BOOST_MMAP
145    mFile.close();
146#else
147    munmap((void *)mFileBuffer, mFileSize);
148#endif
149
150}
151
152
153void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression) {
154                           
155    Module * M = new Module(moduleName, getGlobalContext());
156   
157    IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
158
159    PipelineBuilder pipelineBuilder(M, idb);
160
161    Encoding encoding(Encoding::Type::UTF_8, 8);
162    mIsNameExpression = isNameExpression;
163    re_ast = regular_expression_passes(encoding, re_ast);   
164    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
165
166    pipelineBuilder.CreateKernels(function, isNameExpression);
167
168    pipelineBuilder.ExecuteKernels();
169
170    llvm::Function * main_IR = M->getFunction("Main");
171    mEngine = JIT_to_ExecutionEngine(M);
172   
173    icgrep_Linking(M, mEngine);
174    #ifndef NDEBUG
175    verifyModule(*M, &dbgs());
176    #endif
177    mEngine->finalizeObject();
178    delete idb;
179
180    mMainFcn = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(main_IR));
181}
182
183re::CC *  GrepEngine::grepCodepoints() {
184    setParsedCodePointSet();
185    char * mFileBuffer = getUnicodeNameDataPtr();
186    size_t mFileSize = getUnicodeNameDataSize();
187    std::string mFileName = "Uname.txt";
188
189    uint64_t finalLineUnterminated = 0;
190    if(finalLineIsUnterminated(mFileBuffer, mFileSize))
191        finalLineUnterminated = 1;   
192    mMainFcn(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
193
194    return getParsedCodePointSet();
195}
196
197GrepEngine::~GrepEngine() {
198    delete mEngine;
199}
Note: See TracBrowser for help on using the repository browser.