Changeset 5758


Ignore:
Timestamp:
Dec 6, 2017, 8:52:32 PM (7 days ago)
Author:
cameron
Message:

Bug fix for scan match processing

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5757 r5758  
    220220    friend class EmitMatchesEngine;
    221221public:
    222     EmitMatch(std::string linePrefix, std::stringstream * strm) : mLinePrefix(linePrefix), mLineCount(0), mPrevious_line_end(nullptr), mResultStr(strm) {}
     222    EmitMatch(std::string linePrefix, std::stringstream * strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
    223223    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
    224224    void finalize_match(char * buffer_end) override;
     
    226226    std::string mLinePrefix;
    227227    size_t mLineCount;
    228     char * mPrevious_line_end;
     228    bool mTerminated;
    229229    std::stringstream* mResultStr;
    230230};
     
    234234//  input.  However, if the final line is not terminated, a new line is appended.
    235235//
    236 //  It is possible that the line_end position is past the EOF, if there is an unterminated
    237 //  final line.   To avoid potential bus errors, we only emit bytes up to but not
    238 //  including the line_end position when we first find a match.
    239236void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
    240     if (!(WithFilenameFlag | LineNumberFlag) && (line_start == mPrevious_line_end + 1)) {
    241         // Consecutive matches: only one write call needed.
    242         mResultStr->write(mPrevious_line_end, line_end - mPrevious_line_end);
    243     }
    244     else {
    245         if (mLineCount > 0) {
    246             // Deal with the terminator of the previous line.  It could be an LF, the
    247             // last byte of NEL, LS or PS, or a two byte CRLF sequence.
    248             if (LLVM_UNLIKELY(mPrevious_line_end[0] == 0x0D)) {
    249                 mResultStr->write(mPrevious_line_end, mPrevious_line_end[1] == 0x0A ? 2 : 1);
    250             }
    251             else {
    252                 mResultStr->write(mPrevious_line_end, 1);
    253             }
    254         }
    255         if (WithFilenameFlag) {
    256             *mResultStr << mLinePrefix;
    257         }
    258         if (LineNumberFlag) {
    259             // Internally line numbers are counted from 0.  For display, adjust
    260             // the line number so that lines are numbered from 1.
    261             if (InitialTabFlag) {
    262                 *mResultStr << lineNum+1 << "\t:";
    263             }
    264             else {
    265                 *mResultStr << lineNum+1 << ":";
    266             }
    267         }
    268         mResultStr->write(line_start, line_end - line_start);
    269     }
    270     mPrevious_line_end = line_end;
     237    if (WithFilenameFlag) {
     238        *mResultStr << mLinePrefix;
     239    }
     240    if (LineNumberFlag) {
     241        // Internally line numbers are counted from 0.  For display, adjust
     242        // the line number so that lines are numbered from 1.
     243        if (InitialTabFlag) {
     244            *mResultStr << lineNum+1 << "\t:";
     245        }
     246        else {
     247            *mResultStr << lineNum+1 << ":";
     248        }
     249    }
     250    size_t bytes = line_end - line_start + 1;
     251    mResultStr->write(line_start, bytes);
    271252    mLineCount++;
     253    unsigned last_byte = *line_end;
     254    mTerminated = (last_byte >= 0x0A) && (last_byte <= 0x0D);
     255    if (LLVM_UNLIKELY(!mTerminated)) {
     256        if (last_byte == 0x85) {  //  Possible NEL terminator.
     257            mTerminated = (bytes >= 2) && (static_cast<unsigned>(line_end[-1]) == 0xC2);
     258        }
     259        else {
     260            // Possible LS or PS terminators.
     261            mTerminated = (bytes >= 3) && (static_cast<unsigned>(line_end[-2]) == 0xE2)
     262                                       && (static_cast<unsigned>(line_end[-1]) == 0x80)
     263                                       && ((last_byte == 0xA8) || (last_byte == 0xA9));
     264        }
     265    }
    272266}
    273267
    274268void EmitMatch::finalize_match(char * buffer_end) {
    275     if (mLineCount == 0) return;  // No matches.
    276     if (mPrevious_line_end < buffer_end) {
    277         if (LLVM_UNLIKELY(mPrevious_line_end[0] == 0x0D)) {
    278             mResultStr->write(mPrevious_line_end, mPrevious_line_end[1] == 0x0A ? 2 : 1);
    279         }
    280         else {
    281             mResultStr->write(mPrevious_line_end, 1);
    282         }
    283     }
    284     else {
    285         // Likely unterminated final line.
    286         char last_byte = mPrevious_line_end[-1];
    287         if (last_byte == 0x0D) {
    288             // The final CR is acceptable as a line_end.
    289             return;
    290         }
    291         // Terminate the line with an LF
    292         // (Even if we had an incomplete UTF-8 sequence.)
    293         *mResultStr << "\n";
    294     }
     269    if (!mTerminated) *mResultStr << "\n";
    295270}
    296271
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5757 r5758  
    135135            matchRecordStart->addIncoming(priorRecordStart, prior_breaks_block);
    136136            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
    137             Value * const matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord, true));
     137            Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord, true));
     138            // It is possible that the matchRecordEnd position is one past EOF.  Make sure not
     139            // to access past EOF.
     140            Value * bufLimit = iBuilder->CreateSub(iBuilder->getBufferedSize("InputStream"), ConstantInt::get(sizeTy, 1));
     141            matchRecordEnd = iBuilder->CreateSelect(iBuilder->CreateICmpULT(matchRecordEnd, bufLimit), matchRecordEnd, bufLimit);
    138142            Function * const dispatcher = m->getFunction("accumulate_match_wrapper"); assert (dispatcher);
    139143            Value * const startPtr = iBuilder->getRawInputPointer("InputStream", matchRecordStart);
Note: See TracChangeset for help on using the changeset viewer.