source: icGREP/icgrep-devel/icgrep/do_grep.cpp @ 4388

Last change on this file since 4388 was 4358, checked in by cameron, 4 years ago

Clean up various minor issues that generate compiler warnings.

File size: 8.0 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "icgrep.h"
8#include "do_grep.h"
9
10#include <fstream>
11#include <sstream>
12#include <iostream>
13#include <string>
14#include <stdint.h>
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <unistd.h>
19#include <errno.h>
20#include <sys/types.h>
21#include <sys/stat.h>
22
23#include "include/simd-lib/carryQ.hpp"
24#include "include/simd-lib/pabloSupport.hpp"
25#include "include/simd-lib/s2p.hpp"
26#include "include/simd-lib/buffer.hpp"
27
28// mmap system
29#include <sys/mman.h>
30#include <fcntl.h>
31
32
33#define BUFFER_SEGMENTS 15
34#define BUFFER_SIZE (BUFFER_SEGMENTS * SEGMENT_SIZE)
35
36#define BitBlock_declare(name)  BitBlock name
37
38#define ubitblock_declare(name, n) \
39  ubitblock name[n];\
40  do {int i;\
41      for (i = 0; i < n; i++) name[i]._128 = simd<1>::constant<0>();\
42     }\
43  while (0)
44
45BitBlock EOF_mask = simd<1>::constant<1>();
46
47//
48// Write matched lines from a buffer to an output file, given segment
49// scanners for line ends and matches (where matches are a subset of line ends).
50// The buffer pointer must point to the first byte of the segment
51// corresponding to the scanner indexes.   The first_line_start is the
52// start position of the first line relative to the buffer start position.
53// It must be zero or negative;  if negative, the buffer must permit negative
54// indexing so that the lineup to the buffer start position can also be printed.
55// The start position of the final line in the processed segment is returned.
56//
57
58ssize_t GrepExecutor::write_matches(char * buffer, ssize_t first_line_start) {
59
60  ssize_t line_start = first_line_start;
61  size_t match_pos;
62  size_t line_end;
63  while (match_scanner.has_next()) {
64    match_pos = match_scanner.scan_to_next();
65    // If we found a match, it must be at a line end.
66    line_end = LF_scanner.scan_to_next();
67    while (line_end < match_pos) {
68      line_start = line_end + 1;
69      line_no++;
70      line_end = LF_scanner.scan_to_next();
71    }
72    if (mShowFileNameOption) {
73      std::cout << currentFileName;
74    }
75    if (mShowLineNumberingOption) {
76      std::cout << line_no << ":";
77    }
78    fwrite(&buffer[line_start], 1, line_end - line_start + 1, outfile);
79    line_start = line_end + 1;
80    line_no++;
81
82  }
83  while(LF_scanner.has_next()) {
84    line_end = LF_scanner.scan_to_next();
85    line_start = line_end+1;
86    line_no++;
87  }
88  return line_start;
89}
90
91
92
93void GrepExecutor::doGrep(const std::string infilename) {
94
95    struct Basis_bits basis_bits;
96    struct Output output;
97    BitBlock match_vector;
98    BitBlock carry_q[mCarries];
99    BitBlock advance_q[mAdvances];
100   
101   
102    currentFileName = infilename + ":";
103   
104    int match_count=0;
105    int blk = 0;
106    int block_base  = 0;
107    int block_pos   = 0;
108    int chars_avail = 0;
109    int line_start = 0;
110    line_no = 1;
111
112    match_vector = simd<1>::constant<0>();
113    memset (carry_q, 0, sizeof(BitBlock) * mCarries);
114    memset (advance_q, 0, sizeof(BitBlock) * mAdvances);
115   
116    int fdSrc;
117    struct stat infile_sb;
118    char * infile_buffer;
119    fdSrc = open(infilename.c_str(), O_RDONLY);
120    if (fdSrc == -1) {
121        std::cerr << "Error: cannot open " << infilename << " for processing.\n";
122        exit(-1);
123    }
124    if (fstat(fdSrc, &infile_sb) == -1) {
125        std::cerr << "Error: cannot stat " << infilename << " for processing.\n";
126        exit(-1);
127    }
128    if (infile_sb.st_size == 0) {
129        if (mShowFileNameOption) {
130            std::cout << currentFileName;
131        }
132        if (mCountOnlyOption) fprintf(outfile, "%d\n", 0);
133        exit(0);
134    }
135    infile_buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
136    if (infile_buffer == MAP_FAILED) {
137        std::cerr << "Error: mmap of " << infilename << "failed.\n";
138        exit(-1);
139    }
140   
141   
142    char * buffer_ptr;
143    int segment = 0;
144    int segment_base = 0;
145    chars_avail = infile_sb.st_size;
146   
147//////////////////////////////////////////////////////////////////////////////////////////
148// Full Segments
149//////////////////////////////////////////////////////////////////////////////////////////
150
151    while (chars_avail >= SEGMENT_SIZE) {
152
153        segment_base = segment * SEGMENT_SIZE;
154        LF_scanner.init();
155        match_scanner.init();
156
157        for (blk = 0; blk < SEGMENT_BLOCKS; blk++) {
158            block_base = blk*BLOCK_SIZE + segment_base;
159            s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
160            mProcessBlockFcn(basis_bits, carry_q, advance_q, output);
161
162            LF_scanner.load_block(output.LF, blk);
163            match_scanner.load_block(output.matches, blk);
164            if (mCountOnlyOption){
165                if (bitblock::any(output.matches))
166                {
167                    if (bitblock::any(simd_and(match_vector, output.matches))){
168                        match_count += bitblock::popcount(match_vector);
169                        match_vector = output.matches;
170                    }
171                    else
172                    {
173                        match_vector = simd_or(match_vector, output.matches);
174                    }
175                }
176            }
177        }
178
179        buffer_ptr = &infile_buffer[segment_base];
180
181        if (!mCountOnlyOption) {
182          line_start = write_matches(buffer_ptr, line_start);
183        }
184        segment++;
185        line_start -= SEGMENT_SIZE;  /* Will be negative offset for use within next segment. */
186        chars_avail -= SEGMENT_SIZE;
187    }
188
189//////////////////////////////////////////////////////////////////////////////////////////
190// For the Final Partial Segment.
191//////////////////////////////////////////////////////////////////////////////////////////
192
193    segment_base = segment * SEGMENT_SIZE;
194    int remaining = chars_avail;
195
196    LF_scanner.init();
197    match_scanner.init();
198
199    /* Full Blocks */
200    blk = 0;
201    while (remaining >= BLOCK_SIZE) {
202    //fprintf(outfile, "Remaining = %i\n", remaining);
203        block_base = block_pos + segment_base;
204        s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
205        mProcessBlockFcn(basis_bits, carry_q, advance_q, output);
206
207        LF_scanner.load_block(output.LF, blk);
208        match_scanner.load_block(output.matches, blk);
209        if (mCountOnlyOption)
210        {
211            if (bitblock::any(output.matches))
212            {
213                if (bitblock::any(simd_and(match_vector, output.matches)))
214                {
215                    match_count += bitblock::popcount(match_vector);
216                    match_vector = output.matches;
217                }
218                else
219                {
220                    match_vector = simd_or(match_vector, output.matches);
221                }
222            }
223        }
224
225        block_pos += BLOCK_SIZE;
226        remaining -= BLOCK_SIZE;
227        blk++;
228    }
229    block_base = block_pos;
230    //fprintf(stderr, "Remaining = %i\n", remaining);
231
232    //For the last partial block, or for any carry.
233    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
234    block_base = block_pos + segment_base;
235    s2p_do_final_block((BytePack *) &infile_buffer[block_base], basis_bits, EOF_mask);
236    mProcessBlockFcn(basis_bits, carry_q, advance_q, output);
237
238    if (mCountOnlyOption)
239    {
240        match_count += bitblock::popcount(match_vector);
241        if (bitblock::any(output.matches))
242        {
243            match_count += bitblock::popcount(output.matches);
244        }
245        if (mShowFileNameOption) {
246            std::cout << currentFileName;
247        }
248        fprintf(outfile, "%d\n", match_count);
249    }
250    else
251    {
252        LF_scanner.load_block(output.LF, blk);
253        match_scanner.load_block(output.matches, blk);
254        blk++;
255        for (int i = blk; i < SEGMENT_BLOCKS; i++)
256        {
257            LF_scanner.load_block(simd<1>::constant<0>(), i);
258            match_scanner.load_block(simd<1>::constant<0>(), i);
259        }
260        buffer_ptr = &infile_buffer[segment_base];
261        line_start = write_matches(buffer_ptr, line_start);
262    }
263   
264    munmap((void *) infile_buffer, infile_sb.st_size);
265    close(fdSrc);
266   
267}
Note: See TracBrowser for help on using the repository browser.