source: icGREP/icgrep-devel/icgrep/do_grep.cpp @ 4939

Last change on this file since 4939 was 4939, checked in by lindanl, 4 years ago

new version using the kernels.

File size: 4.4 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include "do_grep.h"
9
10#include <fstream>
11#include <sstream>
12#include <iostream>
13#include <string>
14#include <stdint.h>
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <unistd.h>
19#include <errno.h>
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <stdexcept>
23#include <cctype>
24
25
26#include <llvm/Support/raw_os_ostream.h>
27
28// mmap system
29#ifdef USE_BOOST_MMAP
30#include <boost/filesystem.hpp>
31#include <boost/iostreams/device/mapped_file.hpp>
32using namespace boost::iostreams;
33using namespace boost::filesystem;
34#else
35#include <sys/mman.h>
36#endif
37#include <fcntl.h>
38
39
40#define BUFFER_SEGMENTS 15
41#define BUFFER_SIZE (BUFFER_SEGMENTS * SEGMENT_SIZE)
42
43//
44// Write matched lines from a buffer to an output file, given segment
45// scanners for line ends and matches (where matches are a subset of line ends).
46// The buffer pointer must point to the first byte of the segment
47// corresponding to the scanner indexes.   The first_line_start is the
48// start position of the first line relative to the buffer start position.
49// It must be zero or negative;  if negative, the buffer must permit negative
50// indexing so that the lineup to the buffer start position can also be printed.
51// The start position of the final line in the processed segment is returned.
52//
53
54
55bool GrepExecutor::finalLineIsUnterminated() const {
56    if (mFileSize == 0) return false;
57    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
58    // LF through CR are line break characters
59    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
60    // Other line breaks require at least two bytes.
61    if (mFileSize == 1) return true;
62    // NEL
63    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
64    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
65    if (mFileSize == 2) return true;
66    // LS and PS
67    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
68    return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
69}
70
71void GrepExecutor::doGrep(const std::string & fileName) {
72
73    mFileName = fileName;
74
75#ifdef USE_BOOST_MMAP
76    const path file(mFileName);
77    if (exists(file)) {
78        if (is_directory(file)) {
79            return;
80        }
81    } else {
82        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
83        return;
84    }
85
86    mFileSize = file_size(file);
87    mapped_file mFile;
88    if (mFileSize == 0) {
89        mFileBuffer = nullptr;
90    }
91    else {
92        try {
93            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
94        } catch (std::ios_base::failure e) {
95            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
96            return;
97        }
98        mFileBuffer = mFile.data();
99    }
100#else
101    struct stat infile_sb;
102    const int fdSrc = open(mFileName.c_str(), O_RDONLY);
103    if (fdSrc == -1) {
104        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
105        return;
106    }
107    if (fstat(fdSrc, &infile_sb) == -1) {
108        std::cerr << "Error: cannot stat " << mFileName << " for processing. Skipped.\n";
109        close (fdSrc);
110        return;
111    }
112    if (S_ISDIR(infile_sb.st_mode)) {
113        close (fdSrc);
114        return;
115    }
116    mFileSize = infile_sb.st_size;
117    if (mFileSize == 0) {
118        mFileBuffer = nullptr;
119    }
120    else {
121        mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
122        if (mFileBuffer == MAP_FAILED) {
123            if (errno ==  ENOMEM) {
124                std::cerr << "Error:  mmap of " << mFileName << " failed: out of memory\n";
125                close (fdSrc);
126            }
127            else {
128                std::cerr << "Error: mmap of " << mFileName << " failed with errno " << errno << ". Skipped.\n";
129                close (fdSrc);
130            }
131            return;
132        }
133    }
134#endif
135
136    llvm::raw_os_ostream out(std::cout);
137
138    uint64_t finalLineUnterminated = 0;
139    if(finalLineIsUnterminated())
140        finalLineUnterminated = 1;
141
142    mMainFcn(mFileBuffer, mFileSize, fileName.c_str(), finalLineUnterminated);
143
144    PrintTotalCount();
145   
146#ifdef USE_BOOST_MMAP
147    mFile.close();
148#else
149    munmap((void *)mFileBuffer, mFileSize);
150    close(fdSrc);
151#endif   
152}
Note: See TracBrowser for help on using the repository browser.