source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 4947

Last change on this file since 4947 was 4947, checked in by cameron, 3 years ago

Restructuring step

File size: 5.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_engine.h>
8#include <toolchain.h>
9#include <utf_encoding.h>
10#include <pablo/pablo_compiler.h>
11#include <kernels/pipeline.h>
12#include <llvm/IR/Function.h>
13#include <llvm/IR/Type.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/MCJIT.h>
16#include <llvm/IRReader/IRReader.h>
17#include <llvm/Support/Debug.h>
18#include <llvm/IR/Verifier.h>
19
20#include <fstream>
21#include <sstream>
22#include <iostream>
23#include <string>
24#include <stdint.h>
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <unistd.h>
29#include <errno.h>
30#include <sys/types.h>
31#include <sys/stat.h>
32#include <stdexcept>
33#include <cctype>
34
35
36#include <llvm/Support/raw_os_ostream.h>
37
38// mmap system
39#ifdef USE_BOOST_MMAP
40#include <boost/filesystem.hpp>
41#include <boost/iostreams/device/mapped_file.hpp>
42using namespace boost::iostreams;
43using namespace boost::filesystem;
44#else
45#include <sys/mman.h>
46#endif
47#include <fcntl.h>
48
49#include <kernels/kernel.h>
50
51
52
53bool GrepEngine::finalLineIsUnterminated() const {
54    if (mFileSize == 0) return false;
55    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
56    // LF through CR are line break characters
57    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
58    // Other line breaks require at least two bytes.
59    if (mFileSize == 1) return true;
60    // NEL
61    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
62    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
63    if (mFileSize == 2) return true;
64    // LS and PS
65    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
66    return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
67}
68
69bool GrepEngine::openMMap(const std::string & fileName) {
70
71    mFileName = fileName;
72
73#ifdef USE_BOOST_MMAP
74    const path file(mFileName);
75    if (exists(file)) {
76        if (is_directory(file)) {
77            return false;
78        }
79    } else {
80        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
81        return false;
82    }
83
84    mFileSize = file_size(file);
85    mapped_file mFile;
86    if (mFileSize == 0) {
87        mFileBuffer = nullptr;
88    }
89    else {
90        try {
91            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
92        } catch (std::ios_base::failure e) {
93            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
94            return false;
95        }
96        mFileBuffer = mFile.data();
97    }
98#else
99    struct stat infile_sb;
100    const int fdSrc = open(mFileName.c_str(), O_RDONLY);
101    if (fdSrc == -1) {
102        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
103        return false;
104    }
105    if (fstat(fdSrc, &infile_sb) == -1) {
106        std::cerr << "Error: cannot stat " << mFileName << " for processing. Skipped.\n";
107        close (fdSrc);
108        return false;
109    }
110    if (S_ISDIR(infile_sb.st_mode)) {
111        close (fdSrc);
112        return false;
113    }
114    mFileSize = infile_sb.st_size;
115    if (mFileSize == 0) {
116        mFileBuffer = nullptr;
117    }
118    else {
119        mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
120        if (mFileBuffer == MAP_FAILED) {
121            if (errno ==  ENOMEM) {
122                std::cerr << "Error:  mmap of " << mFileName << " failed: out of memory\n";
123                close (fdSrc);
124            }
125            else {
126                std::cerr << "Error: mmap of " << mFileName << " failed with errno " << errno << ". Skipped.\n";
127                close (fdSrc);
128            }
129            return false;
130        }
131    }
132    close(fdSrc);
133
134#endif
135    return true;  // success
136}
137
138
139void GrepEngine::doGrep() {
140       
141    llvm::raw_os_ostream out(std::cout);
142   
143    uint64_t finalLineUnterminated = 0;
144    if(finalLineIsUnterminated())
145        finalLineUnterminated = 1;
146   
147    mMainFcn(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
148   
149    PrintTotalCount();
150   
151#ifdef USE_BOOST_MMAP
152    mFile.close();
153#else
154    munmap((void *)mFileBuffer, mFileSize);
155#endif   
156}
157
158
159void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression) {
160                           
161    Module * M = new Module("moduleName", getGlobalContext());
162   
163    IDISA::IDISA_Builder * idb = GetNativeIDISA_Builder(M, VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE/64));
164
165    PipelineBuilder pipelineBuilder(M, idb);
166
167    Encoding encoding(Encoding::Type::UTF_8, 8);
168    re_ast = regular_expression_passes(encoding, re_ast);   
169    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
170
171    pipelineBuilder.CreateKernels(function, isNameExpression);
172
173    pipelineBuilder.ExecuteKernels();
174
175    llvm::Function * main_IR = M->getFunction("Main");
176    mEngine = JIT_to_ExecutionEngine(M);
177   
178    icgrep_Linking(M, mEngine);
179    verifyModule(*M, &dbgs());
180    mEngine->finalizeObject();
181    delete idb;
182
183    mMainFcn = (main_fcn_T) mEngine->getPointerToFunction(main_IR);
184}
185
186
187re::CC *  GrepEngine::grepCodepoints() {
188    setParsedCodePointSet();
189    if (openMMap("../UName.txt")) {
190        doGrep();
191    }
192    return getParsedCodePointSet();
193}
Note: See TracBrowser for help on using the repository browser.