Changeset 4477


Ignore:
Timestamp:
Feb 7, 2015, 1:46:57 PM (4 years ago)
Author:
cameron
Message:

Bug fix for big files

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/do_grep.cpp

    r4460 r4477  
    7272    }
    7373    if (mShowFileNameOption) {
    74       std::cout << currentFileName;
     74      std::cout << mFileName;
    7575    }
    7676    if (mShowLineNumberingOption) {
     
    100100    }
    101101    else {
    102         // TODO:  Adjust for CRLF
     102        // Check for line_end on first byte of CRLF;  note that to safely
     103        // access past line_end, even at the end of buffer, we require the
     104        // mmap_sentinel_bytes >= 1.
     105        if (end_byte == 0x0D) {
     106            if (buffer[line_end + 1] == 0x0A) {
     107                line_end++;
     108            }
     109        }
    103110        std::cout.write(&buffer[line_start], line_end - line_start + 1);
    104111    }
    105 
    106112    line_start = line_end + 1;
    107113    line_no++;
     
    127133   
    128134   
    129     currentFileName = infilename + ":";
    130    
    131     int match_count=0;
    132     int blk = 0;
    133     int block_base  = 0;
    134     int block_pos   = 0;
    135     int chars_avail = 0;
    136     int line_start = 0;
     135    mFileName = infilename + ":";
     136   
     137    size_t match_count = 0;
     138    size_t blk = 0;
     139    size_t block_base  = 0;
     140    size_t block_pos   = 0;
     141    size_t chars_avail = 0;
     142    ssize_t line_start = 0;
    137143    line_no = 1;
    138144
     
    158164        return;
    159165    }
    160     infile_buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
     166    mFileSize = infile_sb.st_size;
     167    // Set 2 sentinel bytes, 1 for possible addition of LF for unterminated last line,
     168    // 1 guard byte.
     169    const size_t mmap_sentinel_bytes = 2; 
     170    infile_buffer = (char *) mmap(NULL, mFileSize + mmap_sentinel_bytes, PROT_READ, MAP_PRIVATE, fdSrc, 0);
    161171    if (infile_buffer == MAP_FAILED) {
    162172        std::cerr << "Error: mmap of " << infilename << " failed. Skipped.\n";
    163173        return;
    164174    }
    165     currentFileSize = infile_sb.st_size;
    166175    char * buffer_ptr;
    167     int segment = 0;
    168     int segment_base = 0;
    169     chars_avail = infile_sb.st_size;
     176    size_t segment = 0;
     177    size_t segment_base = 0;
     178    chars_avail = mFileSize;
    170179   
    171180//////////////////////////////////////////////////////////////////////////////////////////
     
    175184    while (chars_avail >= SEGMENT_SIZE) {
    176185
    177         segment_base = segment * SEGMENT_SIZE;
     186        segment_base = segment * SEGMENT_SIZE;
    178187        LF_scanner.init();
    179188        match_scanner.init();
     
    206215          line_start = write_matches(buffer_ptr, line_start);
    207216        }
    208         segment++;
    209         line_start -= SEGMENT_SIZE;  /* Will be negative offset for use within next segment. */
    210         chars_avail -= SEGMENT_SIZE;
     217        segment++;
     218        line_start -= SEGMENT_SIZE;  /* Will be negative offset for use within next segment. */
     219        chars_avail -= SEGMENT_SIZE;
    211220    }
    212221
     
    217226    segment_base = segment * SEGMENT_SIZE;
    218227    int remaining = chars_avail;
     228       
    219229
    220230    LF_scanner.init();
     
    224234    blk = 0;
    225235    while (remaining >= BLOCK_SIZE) {
    226     //fprintf(outfile, "Remaining = %i\n", remaining);
    227236        block_base = block_pos + segment_base;
    228237        s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
     
    255264
    256265    //For the last partial block, or for any carry.
     266   
     267   
    257268    EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    258269    block_base = block_pos + segment_base;
     
    268279        }
    269280        if (mShowFileNameOption) {
    270             std::cout << currentFileName;
     281            std::cout << mFileName;
    271282        }
    272283        std::cout << match_count << std::endl;
     
    286297    }
    287298   
    288     munmap((void *) infile_buffer, infile_sb.st_size);
     299    munmap((void *) infile_buffer, mFileSize + mmap_sentinel_bytes);
    289300    close(fdSrc);
    290301   
  • icGREP/icgrep-devel/icgrep/do_grep.h

    r4460 r4477  
    7171    process_block_fcn mProcessBlockFcn;
    7272   
    73     std::string currentFileName;
    74     size_t currentFileSize;
     73    std::string mFileName;
     74    size_t mFileSize;
    7575    ScannerT LF_scanner;
    7676    ScannerT match_scanner;
    77     long line_no;
     77    size_t line_no;
    7878};
    7979
Note: See TracChangeset for help on using the changeset viewer.