Changeset 5048


Ignore:
Timestamp:
Jun 11, 2016, 10:10:46 PM (3 years ago)
Author:
cameron
Message:

Clean out vestigial finalLineUnterminated logic in grep_engine

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5046 r5048  
    6868bool isUTF_16 = false;
    6969
    70 bool GrepEngine::finalLineIsUnterminated(const char * const fileBuffer, const size_t fileSize, bool UTF_16) {
    71     if (fileSize == 0) return false;
    72     unsigned char end_byte = static_cast<unsigned char>(fileBuffer[fileSize-1]);
    73     // LF through CR are line break characters
    74     if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
    75     // Other line breaks require at least two bytes.
    76     if (fileSize == 1) return true;
    77     // NEL
    78     unsigned char penult_byte = static_cast<unsigned char>(fileBuffer[fileSize-2]);
    79     if ((end_byte == 0x85) && (penult_byte == (UTF_16 ? 0x00 : 0xC2))) return false;
    80     if (fileSize == 2) return true;
    81     // LS and PS
    82     if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
    83         if (!UTF_16) {
    84             return (static_cast<unsigned char>(fileBuffer[fileSize-3]) != 0xE2) || (penult_byte != 0x80);
    85         }
    86         else {// UTF_16
    87             return (penult_byte != 0x20);
    88         }
    89 }
    90 
    9170void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<uint64_t> & total_CountOnly, bool UTF_16) {
    9271    path file(fileName);
     
    10685            char * fileBuffer = const_cast<char *>(source.data());
    10786            if (CountOnly) {
    108                 total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx, finalLineIsUnterminated(fileBuffer, fileSize, UTF_16));
     87                total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx);
    10988            } else {
    110                 mGrepFunction(fileBuffer, fileSize, fileIdx, finalLineIsUnterminated(fileBuffer, fileSize, UTF_16));
     89                mGrepFunction(fileBuffer, fileSize, fileIdx);
    11190            }
    11291            source.close();
     
    11594        }
    11695    } else {
    117         if(CountOnly) {
    118             mGrepFunction_CountOnly(nullptr, 0, fileIdx, false);
     96        if (CountOnly) {
     97            mGrepFunction_CountOnly(nullptr, 0, fileIdx);
    11998        } else {
    120             mGrepFunction(nullptr, 0, fileIdx, false);
     99            mGrepFunction(nullptr, 0, fileIdx);
    121100        }
    122101    }
     
    176155    std::string mFileName = "Uname.txt";
    177156
    178     uint64_t finalLineUnterminated = 0;
    179     if(finalLineIsUnterminated(mFileBuffer, mFileSize, isUTF_16))
    180     finalLineUnterminated = 1;   
    181     mGrepFunction(mFileBuffer, mFileSize, 0, finalLineUnterminated);
     157    mGrepFunction(mFileBuffer, mFileSize, 0);
    182158
    183159    return getParsedCodePointSet();
     
    209185extern "C" {
    210186    void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, uint64_t filesize, int fileIdx) {
    211         int index = isUTF_16 ? 2 : 1;
    212         int idx = fileIdx;
    213      
     187        int index = isUTF_16 ? 2 : 1;
     188        int idx = fileIdx;
     189         
    214190        if (ShowFileNames) {
    215191            resultStrs[idx] << inputFiles[idx] << ':';
     
    236212        }
    237213        unsigned char end_byte = (unsigned char)buffer[line_end];
    238         unsigned char penult_byte = (unsigned char)(buffer[line_end - 1]);
     214        unsigned char penult_byte = (unsigned char)(buffer[line_end - 1]);
    239215        if (NormalizeLineBreaks) {
    240216            if (end_byte == 0x85) {
     
    248224            resultStrs[idx] << '\n';
    249225        }
    250         else{   
     226        else {   
    251227            if ((!isUTF_16 && end_byte == 0x0D) || (isUTF_16 && (end_byte == 0x0D && penult_byte == 0x0))) {
    252228                // Check for line_end on first byte of CRLF;  note that we don't
    253229                // want to access past the end of buffer.
    254                 if (line_end + 1 < filesize) {
    255                     if (!isUTF_16 && buffer[line_end + 1] == 0x0A) {
    256                     // Found CRLF; preserve both bytes.
    257                         line_end++;;
    258                     }
    259                     if (isUTF_16 && buffer[line_end + 1] == 0x0 && buffer[line_end + 2] == 0x0A) {
    260                     // Found CRLF; preserve both bytes.
    261                         line_end += 2;
    262                     }
    263                 }
     230                if (line_end + 1 < filesize) {
     231                    if (!isUTF_16 && buffer[line_end + 1] == 0x0A) {
     232                        // Found CRLF; preserve both bytes.
     233                        line_end++;
     234                    }
     235                    if (isUTF_16 && buffer[line_end + 1] == 0x0 && buffer[line_end + 2] == 0x0A) {
     236                        // Found CRLF; preserve both bytes.
     237                        line_end += 2;
     238                    }
     239                }
    264240            }
    265241            resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start + 1) * index);
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5045 r5048  
    1818
    1919class GrepEngine {
    20     typedef void (*GrepFunctionType)(char * byte_data, size_t filesize, const int fileIdx, uint64_t finalLineUnterminated);
    21     typedef uint64_t (*GrepFunctionType_CountOnly)(char * byte_data, size_t filesize, const int fileIdx, uint64_t finalLineUnterminated);
     20    typedef void (*GrepFunctionType)(char * byte_data, size_t filesize, const int fileIdx);
     21    typedef uint64_t (*GrepFunctionType_CountOnly)(char * byte_data, size_t filesize, const int fileIdx);
    2222public:
    2323
     
    3434private:
    3535   
    36     static bool finalLineIsUnterminated(const char * const fileBuffer, const size_t fileSize, bool UTF_16);
    37 
    3836    GrepFunctionType mGrepFunction;
    3937    GrepFunctionType_CountOnly mGrepFunction_CountOnly;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5046 r5048  
    7878    Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, (UTF_16 ? 16 : 8))})), 1), 0);
    7979    Type * const resultTy = CountOnly ? int64ty : iBuilder->getVoidTy();
    80     Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", resultTy, inputType, int64ty, int64ty, iBuilder->getInt1Ty(), nullptr));
     80    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", resultTy, inputType, int64ty, int64ty, nullptr));
    8181    main->setCallingConv(CallingConv::C);
    8282    Function::arg_iterator args = main->arg_begin();
     
    8888    Value * const fileIdx = &*(args++);
    8989    fileIdx->setName("fileIdx");
    90     Value * const finalLineUnterminated = &*(args++);
    91     finalLineUnterminated->setName("finalLineUnterminated");
    9290
    9391    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
Note: See TracChangeset for help on using the changeset viewer.