Changeset 4802


Ignore:
Timestamp:
Sep 27, 2015, 3:16:44 PM (2 years ago)
Author:
cameron
Message:

getCodepointSetOption

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/do_grep.cpp

    r4800 r4802  
    2121#include <sys/stat.h>
    2222#include <stdexcept>
     23#include <cctype>
    2324
    2425#include "include/simd-lib/carryQ.hpp"
     
    138139}
    139140
     141// Extracting codepoint data from UCD name data file.
     142ssize_t GrepExecutor::extract_codepoints(char * buffer, ssize_t first_line_start) {
     143   
     144    ssize_t line_start = first_line_start;
     145    size_t match_pos;
     146    size_t line_end;
     147   
     148    while (mMatch_scanner.has_next()) {
     149        match_pos = mMatch_scanner.scan_to_next();
     150        // If we found a match, it must be at a line end.
     151        line_end = mLineBreak_scanner.scan_to_next();
     152        while (line_end < match_pos) {
     153            line_start = line_end + 1;
     154            mLineNum++;
     155            line_end = mLineBreak_scanner.scan_to_next();
     156        }
     157       
     158        re::codepoint_t c = 0;
     159        ssize_t line_pos = line_start;
     160        while (isxdigit(buffer[line_pos])) {
     161            if (isdigit(buffer[line_pos])) {
     162                c = (c << 4) | (buffer[line_pos] - '0');
     163            }
     164            else {
     165                c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
     166            }
     167        }
     168        assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
     169        mParsedCodePointSet->insert(c);
     170       
     171        line_start = line_end + 1;
     172        mLineNum++;
     173    }
     174    while(mLineBreak_scanner.has_next()) {
     175        line_end = mLineBreak_scanner.scan_to_next();
     176        line_start = line_end+1;
     177        mLineNum++;
     178    }
     179    return line_start;
     180   
     181}
     182
     183
    140184void GrepExecutor::doGrep(const std::string & fileName) {
    141185
     
    229273            }
    230274        }
    231 
    232275        if (!mCountOnlyOption) {
    233             line_start = write_matches(out, mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     276            if (mGetCodePointsOption) {
     277                line_start = extract_codepoints(mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     278            }
     279            else {
     280                line_start = write_matches(out, mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     281            }
    234282        }
    235283        segment++;
     
    314362            mMatch_scanner.load_block(simd<1>::constant<0>(), blk);
    315363        }
    316         line_start = write_matches(out, mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     364        if (mGetCodePointsOption) {
     365            line_start = extract_codepoints(mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     366        }
     367        else {
     368            line_start = write_matches(out, mFileBuffer + (segment * SEGMENT_SIZE), line_start);
     369        }
    317370    }
    318371#ifdef USE_BOOST_MMAP
  • icGREP/icgrep-devel/icgrep/do_grep.h

    r4788 r4802  
    1313#include "include/simd-lib/transpose.hpp"
    1414#include "include/simd-lib/bitblock_iterator.hpp"
     15#include <re/re_cc.h>
    1516
    1617struct Output {
     
    4748    GrepExecutor(void * process_block)
    4849    : mCountOnlyOption(false)
     50    , mGetCodePointsOption(false)
    4951    , mShowFileNameOption(false)
    5052    , mShowLineNumberingOption(false)
     53    , mParsedCodePointSet(nullptr)
    5154    , mProcessBlockFcn(reinterpret_cast<process_block_fcn>(process_block)) {
    5255
     
    5457         
    5558    void setCountOnlyOption(bool doCount = true) {mCountOnlyOption = doCount;}
     59    void setGetCodePointsOption(bool doCodepoints = true) {
     60        mGetCodePointsOption = doCodepoints;
     61        mParsedCodePointSet = re::makeCC();
     62    }
    5663    void setShowFileNameOption(bool showF = true) {mShowFileNameOption = showF;}
    5764    void setShowLineNumberOption(bool showN = true) {mShowLineNumberingOption = showN;}
     
    5966   
    6067    void doGrep(const std::string & fileName);
     68    re::CC * getParsedCodepoints() { return mParsedCodePointSet;}
    6169private:
    6270    ssize_t write_matches(llvm::raw_ostream & out, const char *buffer, ssize_t first_line_start);
     71   
    6372    bool finalLineIsUnterminated() const;
     73    ssize_t extract_codepoints(char * buffer, ssize_t first_line_start);
    6474
    6575    bool mCountOnlyOption;
     76    bool mGetCodePointsOption;
    6677    bool mShowFileNameOption;
    6778    bool mShowLineNumberingOption;
    6879    bool mNormalizeLineBreaksOption;
     80   
     81    re::CC * mParsedCodePointSet;
    6982
    7083    process_block_fcn mProcessBlockFcn;
Note: See TracChangeset for help on using the changeset viewer.