Changeset 4038


Ignore:
Timestamp:
Aug 17, 2014, 7:26:13 AM (5 years ago)
Author:
cameron
Message:

mmap version

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r3991 r4038  
    1616#include <stdint.h>
    1717
    18 #define assert_0_error(errkind, errstrm)
    19 
    20 // XMLWF application headers and definitions
    2118#include <stdio.h>
    2219#include <stdlib.h>
     
    3330#include <simd-lib/bitblock_iterator.hpp>
    3431
     32// mmap system
     33#include <sys/mman.h>
     34#include <fcntl.h>
     35
     36
    3537#define SEGMENT_BLOCKS 15
    3638#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     
    6163typedef void (*process_block_fcn)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output);
    6264
     65
     66#define USE_MMAP
     67#ifndef USE_MMAP
    6368void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block);
     69#endif
     70#ifdef USE_MMAP
     71void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block);
     72#endif
     73
    6474BitBlock get_category(Basis_bits &basis_bits, const char* category);
    6575
     
    6979    FILE *infile, *outfile, *regexfile;
    7080
     81#ifdef USE_MMAP
     82    int fdSrc;
     83    struct stat infile_sb;
     84    char * infile_buffer;
     85#endif
     86   
    7187    int opt_code;
    7288    int count_only_option = 0;
     
    149165
    150166    infilename = argv[optind++];
     167#ifndef USE_MMAP
    151168    infile = fopen(infilename, "rb");
    152169    if (!infile) {
     
    154171        exit(-1);
    155172    }
     173#endif
     174#ifdef USE_MMAP
     175    fdSrc = open(infilename, O_RDONLY);
     176    if (fdSrc == -1) {
     177        fprintf(stderr, "Error: cannot open %s for processing.\n", infilename);
     178        exit(-1);
     179    }
     180    if (fstat(fdSrc, &infile_sb) == -1) {
     181        fprintf(stderr, "Error: cannot stat %s for processing.\n", infilename);
     182        exit(-1);
     183    }
     184    if (infile_sb.st_size == 0) {
     185        if (count_only_option) fprintf(outfile, "Matching Lines%d\n", 0);
     186        exit(0);
     187    }
     188    infile_buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
     189    if (infile_buffer == MAP_FAILED) {
     190        fprintf(stderr, "Error: mmap of %s failure.\n", infilename);
     191        exit(-1);
     192    }
     193#endif
    156194
    157195    if (optind >= argc) outfile = stdout;
     
    175213    if (print_version_option)
    176214    {
    177         fprintf(outfile, "Parabix icgrep implementation: April 2014\n");
     215        fprintf(outfile, "Parabix icgrep implementation: August 2014\n");
    178216    }
    179217
     
    207245    {
    208246        void (*FP)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output) = (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
     247#ifndef USE_MMAP
    209248        do_process(infile, outfile, count_only_option, llvm_codegen.carry_q_size, FP);
     249#endif
     250#ifdef USE_MMAP
     251        do_process(infile_buffer, infile_sb.st_size, outfile, count_only_option, llvm_codegen.carry_q_size, FP);
     252#endif
    210253    }
    211254
    212255    delete re_compiler;
     256#ifndef USE_MMAP
    213257    fclose(infile);
     258#endif
     259#ifdef USE_MMAP
     260    close(fdSrc);
     261#endif
    214262    fclose(outfile);
    215263    if (regex_from_file_option) free(fileregex);
     
    218266}
    219267
     268#ifndef USE_MMAP
    220269void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block) {
     270#endif
     271#ifdef USE_MMAP
     272void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block) {
     273#endif
    221274
    222275    struct Basis_bits basis_bits;
     
    240293    int line_no = 0;
    241294
    242     int counter = 0;
    243 
    244295    BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> LF_scanner;
    245296    BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> match_scanner;
     297       
     298   
     299#ifndef USE_MMAP
    246300    ATTRIBUTE_SIMD_ALIGN char src_buffer[SEGMENT_SIZE];
    247 
     301   
    248302    chars_read = fread((void *)&src_buffer[0], 1, SEGMENT_SIZE, infile);
    249303    chars_avail = chars_read;
    250304    if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    251 
     305#endif
     306#ifdef USE_MMAP   
     307    int segment = 0;
     308    int segment_base = 0;
     309    chars_avail = infile_size;
     310   
     311#endif
    252312//////////////////////////////////////////////////////////////////////////////////////////
    253313// Full Segments
    254314//////////////////////////////////////////////////////////////////////////////////////////
    255315
     316   
     317   
    256318    while (chars_avail >= SEGMENT_SIZE) {
    257319
     320#ifdef USE_MMAP
     321        segment_base = segment * SEGMENT_SIZE;
     322#endif
    258323        LF_scanner.init();
    259324        match_scanner.init();
    260325
    261         counter++;
    262 
    263326        for (blk = 0; blk < SEGMENT_BLOCKS; blk++) {
     327#ifndef USE_MMAP
    264328            block_base = blk*BLOCK_SIZE;
    265329            s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
     330#endif
     331#ifdef USE_MMAP
     332            block_base = blk*BLOCK_SIZE + segment_base;
     333            s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);   
     334#endif
    266335            process_block(basis_bits, carry_q, output);
    267336
     
    283352        }
    284353
     354#ifndef USE_MMAP
    285355        int copy_back_pos = 0;
    286356
     
    295365
    296366        int  copy_back_size = SEGMENT_SIZE - copy_back_pos;
     367#endif
     368#ifdef USE_MMAP
     369   
     370#endif
    297371
    298372        if (!count_only_option) {
     373#ifndef USE_MMAP
    299374            line_start = 0;
     375#endif
    300376
    301377            while (match_scanner.has_next()) {
     
    307383                    line_end = LF_scanner.scan_to_next();
    308384                }
     385#ifndef USE_MMAP
    309386                fwrite(&src_buffer[line_start], 1, line_end - line_start + 1, outfile);
     387#endif
     388#ifdef USE_MMAP
     389                fwrite(&infile_buffer[segment_base + line_start], 1, line_end - line_start + 1, outfile);
     390 
     391#endif
     392
    310393                line_start = line_end+1;
    311394                line_no++;
     
    317400
    318401        }
    319 
     402#ifndef USE_MMAP
    320403        memmove(&src_buffer[0], &src_buffer[copy_back_pos], copy_back_size);
    321404
    322         //Do another read.
     405        //Do another read.
    323406        chars_read = fread(&src_buffer[copy_back_size], 1, copy_back_pos, infile);
    324407        chars_avail = chars_read + copy_back_size;
    325408        if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    326409        buffer_pos += chars_avail;
     410#endif
     411#ifdef USE_MMAP
     412        segment++;
     413        line_start -= SEGMENT_SIZE;  /* Will be negative offset for use within next segment. */
     414        chars_avail -= SEGMENT_SIZE;
     415   
     416#endif
    327417    }
    328418
     
    332422//////////////////////////////////////////////////////////////////////////////////////////
    333423
    334     block_pos = 0;
     424#ifdef USE_MMAP
     425    segment_base = segment * SEGMENT_SIZE;
     426#endif
    335427    int remaining = chars_avail;
    336428
     
    341433    blk = 0;
    342434    while (remaining >= BLOCK_SIZE) {
     435    //fprintf(outfile, "Remaining = %i\n", remaining);
     436#ifndef USE_MMAP
    343437        block_base = block_pos;
    344         s2p_do_block((BytePack *) &src_buffer[block_pos], basis_bits);
     438        s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
     439#endif
     440#ifdef USE_MMAP
     441        block_base = block_pos + segment_base;
     442        s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);   
     443#endif
    345444        process_block(basis_bits, carry_q, output);
    346445
     
    368467    }
    369468    block_base = block_pos;
     469    //fprintf(stderr, "Remaining = %i\n", remaining);
    370470
    371471    //For the last partial block, or for any carry.
    372472    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    373     s2p_do_final_block((BytePack *) &src_buffer[block_pos], basis_bits, EOF_mask);
     473#ifndef USE_MMAP
     474     block_base = block_pos;
     475     s2p_do_final_block((BytePack *) &src_buffer[block_base], basis_bits, EOF_mask);
     476#endif
     477#ifdef USE_MMAP
     478     block_base = block_pos + segment_base;
     479     s2p_do_final_block((BytePack *) &infile_buffer[block_base], basis_bits, EOF_mask);   
     480#endif
    374481    process_block(basis_bits, carry_q, output);
    375482
     
    393500            match_scanner.load_block(simd<1>::constant<0>(), i);
    394501        }
     502#ifndef USE_MMAP
    395503        line_start = 0;
     504#endif
    396505        while (match_scanner.has_next())
    397506        {
     
    404513                line_end = LF_scanner.scan_to_next();
    405514            }
     515#ifndef USE_MMAP
    406516            fwrite(&src_buffer[line_start], 1, line_end - line_start + 1, outfile);
     517#endif
     518#ifdef USE_MMAP
     519            fwrite(&infile_buffer[segment_base + line_start], 1, line_end - line_start + 1, outfile);
     520 
     521#endif
    407522            line_start = line_end + 1;
    408523            line_no++;
Note: See TracChangeset for help on using the changeset viewer.