source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 4952

Last change on this file since 4952 was 4952, checked in by cameron, 3 years ago

Removing compile-time BLOCK_SIZE from toolchain, grep_engine, enable BlockSize? command-line parameter

File size: 5.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_engine.h>
8#include <toolchain.h>
9#include <utf_encoding.h>
10#include <pablo/pablo_compiler.h>
11#include <kernels/pipeline.h>
12#include <llvm/IR/Function.h>
13#include <llvm/IR/Type.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/MCJIT.h>
16#include <llvm/IRReader/IRReader.h>
17#include <llvm/Support/Debug.h>
18#include <llvm/IR/Verifier.h>
19#include <UCD/UnicodeNameData.h>
20
21#include <fstream>
22#include <sstream>
23#include <iostream>
24#include <string>
25#include <stdint.h>
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <unistd.h>
30#include <errno.h>
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <stdexcept>
34#include <cctype>
35
36
37#include <llvm/Support/raw_os_ostream.h>
38
39// mmap system
40#ifdef USE_BOOST_MMAP
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43using namespace boost::iostreams;
44using namespace boost::filesystem;
45#else
46#include <sys/mman.h>
47#endif
48#include <fcntl.h>
49
50#include <kernels/kernel.h>
51
52
53
54bool GrepEngine::finalLineIsUnterminated() const {
55    if (mFileSize == 0) return false;
56    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
57    // LF through CR are line break characters
58    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
59    // Other line breaks require at least two bytes.
60    if (mFileSize == 1) return true;
61    // NEL
62    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
63    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
64    if (mFileSize == 2) return true;
65    // LS and PS
66    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
67    return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
68}
69
70bool GrepEngine::openMMap(const std::string & fileName) {
71
72    mFileName = fileName;
73
74#ifdef USE_BOOST_MMAP
75    const path file(mFileName);
76    if (exists(file)) {
77        if (is_directory(file)) {
78            return false;
79        }
80    } else {
81        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
82        return false;
83    }
84
85    mFileSize = file_size(file);
86    mapped_file mFile;
87    if (mFileSize == 0) {
88        mFileBuffer = nullptr;
89    }
90    else {
91        try {
92            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
93        } catch (std::ios_base::failure e) {
94            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
95            return false;
96        }
97        mFileBuffer = mFile.data();
98    }
99#else
100    struct stat infile_sb;
101    const int fdSrc = open(mFileName.c_str(), O_RDONLY);
102    if (fdSrc == -1) {
103        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
104        return false;
105    }
106    if (fstat(fdSrc, &infile_sb) == -1) {
107        std::cerr << "Error: cannot stat " << mFileName << " for processing. Skipped.\n";
108        close (fdSrc);
109        return false;
110    }
111    if (S_ISDIR(infile_sb.st_mode)) {
112        close (fdSrc);
113        return false;
114    }
115    mFileSize = infile_sb.st_size;
116    if (mFileSize == 0) {
117        mFileBuffer = nullptr;
118    }
119    else {
120        mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
121        if (mFileBuffer == MAP_FAILED) {
122            if (errno ==  ENOMEM) {
123                std::cerr << "Error:  mmap of " << mFileName << " failed: out of memory\n";
124                close (fdSrc);
125            }
126            else {
127                std::cerr << "Error: mmap of " << mFileName << " failed with errno " << errno << ". Skipped.\n";
128                close (fdSrc);
129            }
130            return false;
131        }
132    }
133    close(fdSrc);
134
135#endif
136    return true;  // success
137}
138
139void GrepEngine::closeMMap() {
140#ifdef USE_BOOST_MMAP
141    mFile.close();
142#else
143    munmap((void *)mFileBuffer, mFileSize);
144#endif   
145
146}
147
148void GrepEngine::doGrep() {
149       
150    llvm::raw_os_ostream out(std::cout);
151   
152    uint64_t finalLineUnterminated = 0;
153    if(finalLineIsUnterminated())
154        finalLineUnterminated = 1;
155   
156    mMainFcn(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
157   
158    if (!mIsNameExpression) PrintTotalCount();
159}
160
161void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression) {
162                           
163    Module * M = new Module("moduleName", getGlobalContext());
164   
165    IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
166
167    PipelineBuilder pipelineBuilder(M, idb);
168
169    Encoding encoding(Encoding::Type::UTF_8, 8);
170    mIsNameExpression = isNameExpression;
171    re_ast = regular_expression_passes(encoding, re_ast);   
172    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
173
174    pipelineBuilder.CreateKernels(function, isNameExpression);
175
176    pipelineBuilder.ExecuteKernels();
177
178    llvm::Function * main_IR = M->getFunction("Main");
179    mEngine = JIT_to_ExecutionEngine(M);
180   
181    icgrep_Linking(M, mEngine);
182    verifyModule(*M, &dbgs());
183    mEngine->finalizeObject();
184    delete idb;
185
186    mMainFcn = (main_fcn_T) mEngine->getPointerToFunction(main_IR);
187}
188
189
190re::CC *  GrepEngine::grepCodepoints() {
191    setParsedCodePointSet();
192    mFileBuffer = getUnicodeNameDataPtr();
193    mFileSize = getUnicodeNameDataSize();
194    mFileName = "Uname.txt";
195    doGrep();
196    return getParsedCodePointSet();
197}
Note: See TracBrowser for help on using the repository browser.