Ignore:
Timestamp:
Sep 23, 2015, 12:54:17 AM (4 years ago)
Author:
nmedfort
Message:

Misc changes + potential SIGBUS fix for issue reported by Hongpu.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/do_grep.cpp

    r4778 r4788  
    2727#include "include/simd-lib/buffer.hpp"
    2828
     29#include <llvm/Support/raw_os_ostream.h>
     30
    2931// mmap system
    3032#ifdef USE_BOOST_MMAP
     33#include <boost/filesystem.hpp>
    3134#include <boost/iostreams/device/mapped_file.hpp>
     35using namespace boost::iostreams;
     36using namespace boost::filesystem;
    3237#else
    3338#include <sys/mman.h>
     
    3843#define BUFFER_SEGMENTS 15
    3944#define BUFFER_SIZE (BUFFER_SEGMENTS * SEGMENT_SIZE)
    40 
    41 BitBlock EOF_mask = simd<1>::constant<1>();
    4245
    4346//
     
    5255//
    5356
    54 ssize_t GrepExecutor::write_matches(char * buffer, ssize_t first_line_start) {
    55 
    56   ssize_t line_start = first_line_start;
    57   ssize_t match_pos;
    58   ssize_t line_end;
    59   while (mMatch_scanner.has_next()) {
    60     match_pos = mMatch_scanner.scan_to_next();
    61     // If we found a match, it must be at a line end.
    62     line_end = mLineBreak_scanner.scan_to_next();
    63     while (line_end < match_pos) {
    64       line_start = line_end + 1;
    65       line_no++;
    66       line_end = mLineBreak_scanner.scan_to_next();
    67     }
    68     if (mShowFileNameOption) {
    69       std::cout << mFileName;
    70     }
    71     if (mShowLineNumberingOption) {
    72       std::cout << line_no << ":";
    73     }
    74     if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
    75         // The LF of a CRLF.  Really the end of the last line. 
    76         line_start++;
    77     }
    78     unsigned char end_byte = (unsigned char) buffer[line_end];
    79     if (mNormalizeLineBreaksOption) {
    80       if (end_byte == 0x85) {
    81           // Line terminated with NEL, on the second byte.  Back up 1.
    82           line_end--;
    83       }
    84       else if (end_byte > 0xD) {
    85           // Line terminated with PS or LS, on the third byte.  Back up 2.
    86           line_end -= 2;
    87       }
    88       std::cout.write(&buffer[line_start], line_end - line_start);
    89       std::cout << std::endl;
    90     }
    91     else {
    92       if (end_byte == 0x0) {
    93           // This must be a sentinel byte position at the end of file.
    94           // Do not write it.
    95           line_end--;
    96       }
    97       else if (end_byte == 0x0D) {
    98           // Check for line_end on first byte of CRLF;  note that to safely
    99           // access past line_end, even at the end of buffer, we require the
    100           // mmap_sentinel_bytes >= 1.
    101           if (buffer[line_end + 1] == 0x0A) {
    102               // Found CRLF; preserve both bytes.
    103               line_end++;
    104           }
    105       }
    106       std::cout.write(&buffer[line_start], line_end - line_start + 1);
    107     }
    108     line_start = line_end + 1;
    109     line_no++;
    110   }
    111   while(mLineBreak_scanner.has_next()) {
    112     line_end = mLineBreak_scanner.scan_to_next();
    113     line_start = line_end+1;
    114     line_no++;
    115   }
    116   return line_start;
     57ssize_t GrepExecutor::write_matches(llvm::raw_ostream & out, const char * buffer, ssize_t line_start) {
     58
     59    ssize_t match_pos;
     60    ssize_t line_end;
     61    while (mMatch_scanner.has_next()) {
     62        match_pos = mMatch_scanner.scan_to_next();
     63        // If we found a match, it must be at a line end.
     64        while (true) {
     65            line_end = mLineBreak_scanner.scan_to_next();
     66            if (line_end >= match_pos) {
     67                break;
     68            }
     69            line_start = line_end + 1;
     70            mLineNum++;
     71        }
     72        assert (buffer + line_end < mFileBuffer + mFileSize);
     73        if (mShowFileNameOption) {
     74            out << mFileName << ':';
     75        }
     76        if (mShowLineNumberingOption) {
     77            out << mLineNum << ":";
     78        }
     79        if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
     80            // The LF of a CRLF.  Really the end of the last line.
     81            line_start++;
     82        }
     83        unsigned char end_byte = (unsigned char)buffer[line_end];
     84        if (mNormalizeLineBreaksOption) {
     85            if (end_byte == 0x85) {
     86                // Line terminated with NEL, on the second byte.  Back up 1.
     87                line_end--;
     88            } else if (end_byte > 0xD) {
     89                // Line terminated with PS or LS, on the third byte.  Back up 2.
     90                line_end -= 2;
     91            }
     92            out.write(&buffer[line_start], line_end - line_start);
     93            out << '\n';
     94        }
     95        else {
     96            if (end_byte == 0x0) {
     97                // This must be a sentinel byte position at the end of file.
     98                // Do not write it.
     99                line_end--;
     100            } else if (end_byte == 0x0D) {
     101                // Check for line_end on first byte of CRLF;  note that to safely
     102                // access past line_end, even at the end of buffer, we require the
     103                // MMAP_SENTINEL_BYTES >= 1.
     104                if (buffer[line_end + 1] == 0x0A) {
     105                    // Found CRLF; preserve both bytes.
     106                    line_end++;
     107                }
     108            }
     109            out.write(&buffer[line_start], line_end - line_start + 1);
     110        }
     111        line_start = line_end + 1;
     112        mLineNum++;
     113    }
     114    while(mLineBreak_scanner.has_next()) {
     115        line_end = mLineBreak_scanner.scan_to_next();
     116        line_start = line_end+1;
     117        mLineNum++;
     118    }
     119    return line_start;
    117120}
    118121
    119 bool GrepExecutor::finalLineIsUnterminated() {
     122bool GrepExecutor::finalLineIsUnterminated() const {
    120123    if (mFileSize == 0) return false;
    121124    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
     
    124127    // Other line breaks require at least two bytes.
    125128    if (mFileSize == 1) return true;
    126     // NEL 
     129    // NEL
    127130    unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
    128131    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
     
    133136}
    134137
    135 void GrepExecutor::doGrep(const std::string infilename) {
     138void GrepExecutor::doGrep(const std::string & fileName) {
    136139
    137140    Basis_bits basis_bits;
    138     BitBlock match_vector;
    139    
    140     mFileName = infilename + ":";
    141    
     141    BitBlock match_vector = simd<1>::constant<0>();
    142142    size_t match_count = 0;
    143     size_t blk = 0;
    144     size_t block_base  = 0;
    145     size_t block_pos   = 0;
    146143    size_t chars_avail = 0;
    147144    ssize_t line_start = 0;
    148     line_no = 1;
    149 
    150     match_vector = simd<1>::constant<0>();
    151     int fdSrc;
    152     struct stat infile_sb;
    153     fdSrc = open(infilename.c_str(), O_RDONLY);
    154     if (fdSrc == -1) {
    155         std::cerr << "Error: cannot open " << infilename << " for processing. Skipped.\n";
    156         return;
    157     }
    158     if (fstat(fdSrc, &infile_sb) == -1) {
    159         std::cerr << "Error: cannot stat " << infilename << " for processing. Skipped.\n";
    160         return;
    161     }
    162     if (S_ISDIR(infile_sb.st_mode)) {
    163         // Silently ignore directories.
    164         // std::cerr << "Error: " << infilename << " is a directory. Skipped.\n";
    165         return;
    166     }
    167     mFileSize = infile_sb.st_size;
    168     // Set 2 sentinel bytes, 1 for possible addition of LF for unterminated last line,
    169     // 1 guard byte.  PROT_WRITE enables writing the sentinel.
    170     const size_t mmap_sentinel_bytes = 2; 
     145
     146    mFileName = fileName;
     147    mLineNum = 1;
     148
    171149#ifdef USE_BOOST_MMAP
    172     boost::iostreams::mapped_file mFile;
     150    const path file(mFileName);
     151    if (exists(file)) {
     152        if (is_directory(file)) {
     153            return;
     154        }
     155    } else {
     156        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
     157        return;
     158    }
     159
     160    mFileSize = file_size(file);
     161    mapped_file mFile;
    173162    try {
    174         mFile.open(
    175             infilename,
    176             boost::iostreams::mapped_file_base::mapmode::priv,
    177             mFileSize + mmap_sentinel_bytes, 0
    178         );
     163        mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
    179164    } catch (std::ios_base::failure e) {
    180165        std::cerr << "Error: Boost mmap " << e.what() << std::endl;
     
    183168    mFileBuffer = mFile.data();
    184169#else
    185     mFileBuffer = (char *) mmap(NULL, mFileSize + mmap_sentinel_bytes, PROT_READ|PROT_WRITE, MAP_PRIVATE, fdSrc, 0);
     170    struct stat infile_sb;
     171    const int fdSrc = open(mFileName.c_str(), O_RDONLY);
     172    if (fdSrc == -1) {
     173        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
     174        return;
     175    }
     176    if (fstat(fdSrc, &infile_sb) == -1) {
     177        std::cerr << "Error: cannot stat " << mFileName << " for processing. Skipped.\n";
     178        close (fdSrc);
     179        return;
     180    }
     181    if (S_ISDIR(infile_sb.st_mode)) {
     182        close (fdSrc);
     183        return;
     184    }
     185    mFileSize = infile_sb.st_size;
     186    mFileBuffer = (char *) mmap(NULL, mFileSize, PROT_READ, MAP_PRIVATE, fdSrc, 0);
    186187    if (mFileBuffer == MAP_FAILED) {
    187188        if (errno ==  ENOMEM) {
    188             std::cerr << "Error:  mmap of " << infilename << " failed: out of memory\n";
     189            std::cerr << "Error:  mmap of " << mFileName << " failed: out of memory\n";
    189190        }
    190191        else {
    191             std::cerr << "Error: mmap of " << infilename << " failed with errno " << errno << ". Skipped.\n";
     192            std::cerr << "Error: mmap of " << mFileName << " failed with errno " << errno << ". Skipped.\n";
    192193        }
    193194        return;
    194195    }
    195196#endif
    196     char * buffer_ptr;
    197197    size_t segment = 0;
    198     size_t segment_base = 0;
    199198    chars_avail = mFileSize;
    200    
    201 //////////////////////////////////////////////////////////////////////////////////////////
    202 // Full Segments
    203 //////////////////////////////////////////////////////////////////////////////////////////
     199
     200    llvm::raw_os_ostream out(std::cout);
     201    //////////////////////////////////////////////////////////////////////////////////////////
     202    // Full Segments
     203    //////////////////////////////////////////////////////////////////////////////////////////
    204204
    205205    while (chars_avail >= SEGMENT_SIZE) {
    206206
    207         segment_base = segment * SEGMENT_SIZE;
    208207        mLineBreak_scanner.init();
    209208        mMatch_scanner.init();
    210209
    211         for (blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    212             block_base = blk*BLOCK_SIZE + segment_base;
    213             s2p_do_block((BytePack *) &mFileBuffer[block_base], basis_bits);
     210        for (size_t blk = 0; blk != SEGMENT_BLOCKS; ++blk) {
     211            s2p_do_block(reinterpret_cast<BytePack *>(mFileBuffer + (blk * BLOCK_SIZE) + (segment * SEGMENT_SIZE)), basis_bits);
    214212            Output output;
    215213            mProcessBlockFcn(basis_bits, output);
     
    218216            mLineBreak_scanner.load_block(output.LF, blk);
    219217
    220             if (mCountOnlyOption){
    221                 if (bitblock::any(output.matches))
    222                 {
    223                     if (bitblock::any(simd_and(match_vector, output.matches))){
     218            if (mCountOnlyOption) {
     219                if (bitblock::any(output.matches)) {
     220                    if (bitblock::any(simd_and(match_vector, output.matches))) {
    224221                        match_count += bitblock::popcount(match_vector);
    225222                        match_vector = output.matches;
    226                     }
    227                     else
    228                     {
     223                    } else {
    229224                        match_vector = simd_or(match_vector, output.matches);
    230225                    }
     
    233228        }
    234229
    235         buffer_ptr = &mFileBuffer[segment_base];
    236 
    237230        if (!mCountOnlyOption) {
    238           line_start = write_matches(buffer_ptr, line_start);
     231            line_start = write_matches(out, mFileBuffer + (segment * SEGMENT_SIZE), line_start);
    239232        }
    240233        segment++;
     
    243236    }
    244237
    245 //////////////////////////////////////////////////////////////////////////////////////////
    246 // For the Final Partial Segment.
    247 //////////////////////////////////////////////////////////////////////////////////////////
    248 
    249     segment_base = segment * SEGMENT_SIZE;
    250     int remaining = chars_avail;
    251        
     238    //////////////////////////////////////////////////////////////////////////////////////////
     239    // For the Final Partial Segment.
     240    //////////////////////////////////////////////////////////////////////////////////////////
     241
     242    size_t remaining = chars_avail;
     243    size_t blk = 0;
    252244
    253245    mLineBreak_scanner.init();
     
    255247
    256248    /* Full Blocks */
    257     blk = 0;
    258     while (remaining >= BLOCK_SIZE) {
    259         block_base = block_pos + segment_base;
    260         s2p_do_block((BytePack *) &mFileBuffer[block_base], basis_bits);
     249    for (; remaining >= BLOCK_SIZE; remaining -= BLOCK_SIZE, ++blk) {
     250        s2p_do_block(reinterpret_cast<BytePack *>(mFileBuffer + (blk * BLOCK_SIZE) + (segment * SEGMENT_SIZE)), basis_bits);
    261251        Output output;
    262252        mProcessBlockFcn(basis_bits, output);
     
    264254        mLineBreak_scanner.load_block(output.LF, blk);
    265255        mMatch_scanner.load_block(output.matches, blk);
    266         if (mCountOnlyOption)
    267         {
    268             if (bitblock::any(output.matches))
    269             {
    270                 if (bitblock::any(simd_and(match_vector, output.matches)))
    271                 {
     256        if (mCountOnlyOption) {
     257            if (bitblock::any(output.matches)) {
     258                if (bitblock::any(simd_and(match_vector, output.matches))) {
    272259                    match_count += bitblock::popcount(match_vector);
    273260                    match_vector = output.matches;
    274                 }
    275                 else
    276                 {
     261                } else {
    277262                    match_vector = simd_or(match_vector, output.matches);
    278263                }
    279264            }
    280265        }
    281 
    282         block_pos += BLOCK_SIZE;
    283         remaining -= BLOCK_SIZE;
    284         blk++;
    285     }
    286     block_base = block_pos;
     266    }
    287267
    288268    //Final Partial Block (may be empty, but there could be carries pending).
    289269   
    290     EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
     270    const auto EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE - remaining));
    291271   
    292     block_base = block_pos + segment_base;
    293     s2p_do_final_block((BytePack *) &mFileBuffer[block_base], basis_bits, EOF_mask);
     272    s2p_do_final_block(reinterpret_cast<BytePack *>(mFileBuffer + (blk * BLOCK_SIZE) + (segment * SEGMENT_SIZE)), basis_bits, EOF_mask);
    294273
    295274    if (finalLineIsUnterminated()) {
     
    299278        basis_bits.bit_4 = simd_or(basis_bits.bit_4, EOF_pos);
    300279        basis_bits.bit_6 = simd_or(basis_bits.bit_6, EOF_pos);
    301         // Add final sentinel byte so write_matches knows what to do.
    302         mFileBuffer[mFileSize] = 0x0;
    303280    }
    304281   
     
    306283    mProcessBlockFcn(basis_bits, output);
    307284
    308     if (mCountOnlyOption)
    309     {
     285    if (mCountOnlyOption) {
    310286        match_count += bitblock::popcount(match_vector);
    311         if (bitblock::any(output.matches))
    312         {
     287        if (bitblock::any(output.matches)) {
    313288            match_count += bitblock::popcount(output.matches);
    314289        }
    315290        if (mShowFileNameOption) {
    316             std::cout << mFileName;
    317         }
    318         std::cout << match_count << std::endl;
    319     }
    320     else
    321     {
     291            out << mFileName << ':';
     292        }
     293        out << match_count << '\n';
     294    } else {
    322295        mLineBreak_scanner.load_block(output.LF, blk);
    323296        mMatch_scanner.load_block(output.matches, blk);
    324         blk++;
    325         for (int i = blk; i < SEGMENT_BLOCKS; i++)
    326         {
    327             mLineBreak_scanner.load_block(simd<1>::constant<0>(), i);
    328             mMatch_scanner.load_block(simd<1>::constant<0>(), i);
    329         }
    330         buffer_ptr = &mFileBuffer[segment_base];
    331         line_start = write_matches(buffer_ptr, line_start);
    332     }
    333    
     297        while (++blk < SEGMENT_BLOCKS) {
     298            mLineBreak_scanner.load_block(simd<1>::constant<0>(), blk);
     299            mMatch_scanner.load_block(simd<1>::constant<0>(), blk);
     300        }
     301        line_start = write_matches(out, mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     302    }
    334303#ifdef USE_BOOST_MMAP
    335304    mFile.close();
    336305#else
    337     munmap((void *) mFileBuffer, mFileSize + mmap_sentinel_bytes);
    338 #endif
     306    munmap((void *)mFileBuffer, mFileSize);
    339307    close(fdSrc);
    340     
     308#endif   
    341309}
Note: See TracChangeset for help on using the changeset viewer.