source: icGREP/icgrep-devel/icgrep/grep_engine.cpp @ 4986

Last change on this file since 4986 was 4986, checked in by nmedfort, 3 years ago

First attempt at dynamic segment size intergration.

File size: 4.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_engine.h>
8#include <IDISA/idisa_builder.h>
9#include <IDISA/idisa_target.h>
10#include <re/re_toolchain.h>
11#include <pablo/pablo_toolchain.h>
12#include <toolchain.h>
13#include <utf_encoding.h>
14#include <pablo/pablo_compiler.h>
15#include <kernels/pipeline.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Type.h>
18#include <llvm/IR/Module.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include <llvm/IRReader/IRReader.h>
21#include <llvm/Support/Debug.h>
22#include <llvm/IR/Verifier.h>
23#include <UCD/UnicodeNameData.h>
24
25#include <fstream>
26#include <sstream>
27#include <iostream>
28#include <string>
29#include <stdint.h>
30
31#include <stdio.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <errno.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <stdexcept>
38#include <cctype>
39
40
41#include <llvm/Support/raw_os_ostream.h>
42
43// mmap system
44#include <boost/filesystem.hpp>
45#include <boost/iostreams/device/mapped_file.hpp>
46using namespace boost::iostreams;
47using namespace boost::filesystem;
48
49#include <fcntl.h>
50
51#include <kernels/kernel.h>
52
53
54
55bool GrepEngine::finalLineIsUnterminated(char * mFileBuffer, size_t mFileSize) const {
56    if (mFileSize == 0) return false;
57    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
58    // LF through CR are line break characters
59    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
60    // Other line breaks require at least two bytes.
61    if (mFileSize == 1) return true;
62    // NEL
63    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
64    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
65    if (mFileSize == 2) return true;
66    // LS and PS
67    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
68    return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
69}
70
71void GrepEngine::doGrep(const std::string & fileName) {
72    std::string mFileName = fileName;
73    size_t mFileSize;
74    char * mFileBuffer;
75
76    const path file(mFileName);
77    if (exists(file)) {
78        if (is_directory(file)) {
79            return;
80        }
81    } else {
82        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
83        return;
84    }
85
86    mFileSize = file_size(file);
87    mapped_file mFile;
88    if (mFileSize == 0) {
89        mFileBuffer = nullptr;
90    }
91    else {
92        try {
93            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
94        } catch (std::ios_base::failure e) {
95            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
96            return;
97        }
98        mFileBuffer = mFile.data();
99    }
100   
101    uint64_t finalLineUnterminated = 0;
102    if(finalLineIsUnterminated(mFileBuffer, mFileSize))
103        finalLineUnterminated = 1;
104   
105    mGrepFunction(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
106
107
108    mFile.close();
109
110}
111
112
113void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression) {
114                           
115    Module * M = new Module(moduleName, getGlobalContext());
116   
117    IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
118
119    kernel::PipelineBuilder pipelineBuilder(M, idb);
120
121    Encoding encoding(Encoding::Type::UTF_8, 8);
122    mIsNameExpression = isNameExpression;
123    re_ast = regular_expression_passes(encoding, re_ast);   
124    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
125   
126
127    pipelineBuilder.CreateKernels(function, isNameExpression);
128
129    llvm::Function * grepIR = pipelineBuilder.ExecuteKernels();
130
131    mEngine = JIT_to_ExecutionEngine(M);
132   
133    icgrep_Linking(M, mEngine);
134    #ifndef NDEBUG
135    verifyModule(*M, &dbgs());
136    #endif
137    mEngine->finalizeObject();
138    delete idb;
139
140    mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(grepIR));
141}
142
143re::CC *  GrepEngine::grepCodepoints() {
144
145    setParsedCodePointSet();
146    char * mFileBuffer = getUnicodeNameDataPtr();
147    size_t mFileSize = getUnicodeNameDataSize();
148    std::string mFileName = "Uname.txt";
149
150    uint64_t finalLineUnterminated = 0;
151    if(finalLineIsUnterminated(mFileBuffer, mFileSize))
152        finalLineUnterminated = 1;   
153    mGrepFunction(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
154
155    return getParsedCodePointSet();
156}
157
158GrepEngine::~GrepEngine() {
159    delete mEngine;
160}
Note: See TracBrowser for help on using the repository browser.