Changeset 4259


Ignore:
Timestamp:
Oct 23, 2014, 5:13:24 AM (4 years ago)
Author:
cameron
Message:

Update RE prototype to use mmap

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/output/grep_template.cpp

    r3578 r4259  
    3636#endif
    3737
     38// mmap system
     39#include <sys/mman.h>
     40#include <fcntl.h>
     41
     42#if (BLOCK_SIZE == 128)
     43#define SEGMENT_BLOCKS 7
     44#endif
     45
     46#if (BLOCK_SIZE == 256)
    3847#define SEGMENT_BLOCKS 15
     48#endif
     49
    3950#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    4051
     
    4556#include <simd-lib/transpose.hpp>
    4657
     58#define USE_MMAP
     59
     60#ifndef USE_MMAP
    4761static void do_process(FILE *infile, FILE *outfile, int count_only_option);
     62#endif
     63#ifdef USE_MMAP
     64static void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option);
     65#endif
    4866
    4967int main(int argc, char * argv[]) {
     
    5169        char * infilename, * outfilename;
    5270        FILE *infile, *outfile;
     71#ifdef USE_MMAP
     72    int fdSrc;
     73    struct stat infile_sb;
     74    char * infile_buffer;
     75#endif
     76
    5377
    5478        int opt_code;
     
    80104
    81105        infilename = argv[optind++];
     106#ifndef USE_MMAP
    82107        infile = fopen(infilename, "rb");
    83108        if (!infile) {
     
    85110                exit(-1);
    86111        }
     112#endif
    87113
    88114        if (optind >= argc) outfile = stdout;
     
    103129        }
    104130
     131#ifdef USE_MMAP
     132    fdSrc = open(infilename, O_RDONLY);
     133    if (fdSrc == -1) {
     134        fprintf(stderr, "Error: cannot open %s for processing.\n", infilename);
     135        exit(-1);
     136    }
     137    if (fstat(fdSrc, &infile_sb) == -1) {
     138        fprintf(stderr, "Error: cannot stat %s for processing.\n", infilename);
     139        exit(-1);
     140    }
     141    if (infile_sb.st_size == 0) {
     142        if (count_only_option) fprintf(outfile, "Matching Lines: %d\n", 0);
     143        exit(0);
     144    }
     145    infile_buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
     146    if (infile_buffer == MAP_FAILED) {
     147        fprintf(stderr, "Error: mmap of %s failure.\n", infilename);
     148        exit(-1);
     149    }
     150#endif
    105151
    106152        if (print_version_option) {
     
    112158        PERF_SEC_INIT(parser_timer);
    113159
     160#ifndef USE_MMAP
    114161        do_process(infile, outfile, count_only_option);
     162#endif
     163#ifdef USE_MMAP
     164        do_process(infile_buffer, infile_sb.st_size, outfile, count_only_option);
     165#endif
    115166
    116167        PERF_SEC_DUMP(parser_timer);
     
    118169        PERF_SEC_DESTROY(parser_timer);
    119170
    120         fclose(infile);
     171#ifndef USE_MMAP
     172    fclose(infile);
     173#endif
     174#ifdef USE_MMAP
     175    munmap((void *) infile_buffer, infile_sb.st_size);
     176    close(fdSrc);
     177#endif
    121178        fclose(outfile);
    122179
     
    124181}
    125182
     183#if (BLOCK_SIZE == 256)
     184typedef BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> ScannerT;
     185#endif
     186
     187#if (BLOCK_SIZE == 128)
     188typedef BitStreamScanner<BitBlock, uint32_t, uint32_t, SEGMENT_BLOCKS> ScannerT;
     189#endif
     190
     191//
     192// Write matched lines from a buffer to an output file, given segment
     193// scanners for line ends and matches (where matches are a subset of line ends).
     194// The buffer pointer must point to the first byte of the segment
     195// corresponding to the scanner indexes.   The first_line_start is the
     196// start position of the first line relative to the buffer start position.
     197// It must be zero or negative;  if negative, the buffer must permit negative
     198// indexing so that the lineup to the buffer start position can also be printed.
     199// The start position of the final line in the processed segment is returned.
     200//
     201
     202ssize_t write_matches(FILE * outfile, ScannerT line_scanner, ScannerT match_scanner, char * buffer, ssize_t first_line_start) {
     203       
     204        ssize_t line_start = first_line_start;
     205        size_t match_pos;
     206        size_t line_end;
     207        while (match_scanner.has_next()) {
     208                match_pos = match_scanner.scan_to_next();
     209                // If we found a match, it must be at a line end.
     210                line_end = line_scanner.scan_to_next();
     211                while (line_end < match_pos) {
     212                        line_start = line_end + 1;
     213                        line_end = line_scanner.scan_to_next();
     214                }
     215                fwrite(&buffer[line_start], 1, line_end - line_start + 1, outfile);
     216                line_start = line_end + 1;
     217               
     218        }
     219        while(line_scanner.has_next()) {
     220                line_end = line_scanner.scan_to_next();
     221                line_start = line_end+1;
     222  }
     223  return line_start;
     224}
     225
     226
     227
     228#ifndef USE_MMAP
    126229void do_process(FILE *infile, FILE *outfile, int count_only_option) {
     230#endif
     231#ifdef USE_MMAP
     232        void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option){
     233#endif
    127234
    128235        @decl
     
    139246        int chars_avail = 0;
    140247        int chars_read  = 0;
    141 
    142         int line_start, line_end, match_pos, line_no;
    143         line_no = 0;
    144 
    145         BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> LF_scanner;
    146         BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> match_scanner;
    147         ATTRIBUTE_SIMD_ALIGN char src_buffer[SEGMENT_SIZE];     
    148        
    149         chars_read = fread((void *)&src_buffer[0], 1, SEGMENT_SIZE, infile);
    150         chars_avail = chars_read;
    151         if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    152 
     248                int line_start = 0;
     249                int line_end = 0;
     250                int match_pos = 0;
     251                int line_no = 0;
     252               
     253                ScannerT LF_scanner;
     254                ScannerT match_scanner;
     255
     256                char * buffer_ptr;
     257#ifndef USE_MMAP
     258                ATTRIBUTE_SIMD_ALIGN char src_buffer[SEGMENT_SIZE];
     259                buffer_ptr = &src_buffer;
     260                chars_read = fread((void *)&src_buffer[0], 1, SEGMENT_SIZE, infile);
     261                chars_avail = chars_read;
     262                if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     263#endif
     264#ifdef USE_MMAP
     265                int segment = 0;
     266                int segment_base = 0;
     267                chars_avail = infile_size;
     268
     269#endif
    153270
    154271        @stream_stmts
     
    159276       
    160277        while (chars_avail >= SEGMENT_SIZE) {
     278#ifdef USE_MMAP
     279                segment_base = segment * SEGMENT_SIZE;
     280#endif
    161281                LF_scanner.init();
    162282                match_scanner.init();
     
    164284                PERF_SEC_START(parser_timer);
    165285                for (blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    166                         block_base = blk*BLOCK_SIZE;
    167                         s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
     286#ifndef USE_MMAP
     287            block_base = blk*BLOCK_SIZE;
     288            s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
     289#endif
     290#ifdef USE_MMAP
     291            block_base = blk*BLOCK_SIZE + segment_base;
     292            s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
     293#endif
    168294
    169295                        @block_stmts
     
    184310
    185311                }
     312#ifndef USE_MMAP
    186313 
    187314                demo.clear();
     
    191318
    192319                match_scanner.clear_from(copy_back_pos);
    193 
    194 
    195                 if (!count_only_option) {
    196                         line_start = 0;
    197 
    198                         while (match_scanner.has_next()) {
    199                                 match_pos = match_scanner.scan_to_next();       
    200                                 line_end = LF_scanner.scan_to_next();
    201                                 while (line_end < match_pos) {
    202                                         line_start = line_end+1;
    203                                         line_no++; 
    204                                         line_end = LF_scanner.scan_to_next();
    205                                 }
    206                                 fwrite(&src_buffer[line_start], 1, line_end - line_start + 1, outfile);
    207                                 line_start = line_end+1;
    208                                 line_no++; 
    209                         }
    210                         while (LF_scanner.has_next()) {
    211                                 line_end = LF_scanner.scan_to_next();
    212                                 line_no++;
    213                         }       
    214                 }
    215 
     320#endif
     321#ifdef USE_MMAP
     322                buffer_ptr = &infile_buffer[segment_base];
     323#endif
     324
     325
     326        if (!count_only_option) {
     327                        line_start = write_matches(outfile, LF_scanner, match_scanner, buffer_ptr, line_start);
     328        }
     329
     330#ifndef USE_MMAP
    216331                memmove(&src_buffer[0], &src_buffer[copy_back_pos], copy_back_size);
    217 
     332               
    218333                PERF_SEC_END(parser_timer, chars_avail);
    219 
     334               
    220335                chars_read = fread(&src_buffer[copy_back_size], 1, copy_back_pos, infile);
    221336                chars_avail = chars_read + copy_back_size;
     
    223338                buffer_pos += chars_avail;
    224339                buffer_base = buffer_pos;
     340#endif
     341#ifdef USE_MMAP
     342                segment++;
     343                line_start -= SEGMENT_SIZE;  /* Will be negative offset for use within next segment. */
     344                chars_avail -= SEGMENT_SIZE;
     345#endif
    225346        }
    226347
     
    231352        PERF_SEC_START(parser_timer);
    232353
    233         block_pos = 0;
     354#ifdef USE_MMAP
     355                segment_base = segment * SEGMENT_SIZE;
     356#endif
    234357        int remaining = chars_avail;
    235358
     
    241364        blk = 0;
    242365        while (remaining >= BLOCK_SIZE) {
    243                 block_base = block_pos;
    244                 s2p_do_block((BytePack *) &src_buffer[block_pos], basis_bits);
     366#ifndef USE_MMAP
     367        block_base = block_pos;
     368        s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
     369#endif
     370#ifdef USE_MMAP
     371        block_base = block_pos + segment_base;
     372        s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
     373#endif
    245374                @block_stmts
    246375                LF_scanner.load_block(lex.LF, blk);
     
    266395//
    267396        EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    268                                 s2p_do_final_block((BytePack *) &src_buffer[block_pos], basis_bits, EOF_mask);
     397#ifndef USE_MMAP
     398                block_base = block_pos;
     399                s2p_do_final_block((BytePack *) &src_buffer[block_base], basis_bits, EOF_mask);
     400#endif
     401#ifdef USE_MMAP
     402                block_base = block_pos + segment_base;
     403                s2p_do_final_block((BytePack *) &infile_buffer[block_base], basis_bits, EOF_mask);
     404#endif
    269405        @final_block_stmts
    270406
     
    285421                        match_scanner.load_block(simd<1>::constant<0>(), i);
    286422                }
    287                 line_start = 0;
    288 
    289                 while (match_scanner.has_next()) {
    290                         match_pos = match_scanner.scan_to_next();       
    291                         line_end = LF_scanner.scan_to_next();
    292                         while (line_end < match_pos) {
    293                                 line_start = line_end+1;
    294                                 line_no++; 
    295                                 line_end = LF_scanner.scan_to_next();
    296                         }
    297                         fwrite(&src_buffer[line_start], 1, line_end - line_start + 1, outfile);
    298                         line_start = line_end+1;
    299                         line_no++; 
    300                 }
    301                 while (LF_scanner.has_next()) {
    302                         line_end = LF_scanner.scan_to_next();
    303                         line_no++;     
    304                 }       
     423#ifndef USE_MMAP
     424        line_start = 0;
     425#endif
     426#ifdef USE_MMAP
     427        buffer_ptr = &infile_buffer[segment_base];
     428#endif
     429        line_start = write_matches(outfile, LF_scanner, match_scanner, buffer_ptr, line_start);
     430
    305431        }
    306432
    307433        buffer_pos += chars_avail;
    308         buffer_base = buffer_pos;
    309434
    310435        PERF_SEC_END(parser_timer, chars_avail);
Note: See TracChangeset for help on using the changeset viewer.