Changeset 5965 for icGREP


Ignore:
Timestamp:
Apr 11, 2018, 11:30:29 AM (10 months ago)
Author:
cameron
Message:

Support for file/directory include/exclude

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r5964 r5965  
    183183   
    184184   
    185 #define MAX_SIMD_WIDTH_SUPPORTED 512
    186 #define INITIAL_CAPACITY 1024
     185#define MAX_SIMD_WIDTH_SUPPORTED 256
     186#define INITIAL_CAPACITY 64
    187187   
    188188class SearchableBuffer  {
  • icGREP/icgrep-devel/icgrep/util/file_select.cpp

    r5964 r5965  
    1414#include <re/parsers/parser.h>
    1515#include <re/re_alt.h>
     16#include <re/re_seq.h>
     17#include <re/re_start.h>
     18#include <re/re_end.h>
     19#include <re/re_cc.h>
    1620#include <re/re_toolchain.h>
     21#include <re/printer_re.h>
    1722#include <grep/grep_engine.h>
    1823#include <fstream>
     
    4449
    4550std::string ExcludeDirFlag;
    46 static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), cl::cat(Input_Options));
     51static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."),
     52                                                   cl::init(".svn"), cl::cat(Input_Options));
     53
     54std::string IncludeDirFlag;
     55static cl::opt<std::string, true> IncludeDirOption("include-dir", cl::location(IncludeDirFlag), cl::desc("Include directories matching the given pattern."), cl::cat(Input_Options));
    4756
    4857std::string IncludeFlag;
     
    6473static cl::alias DirectoriesAlias("directories", cl::desc("Alias for -d"), cl::aliasopt(DirectoriesOption));
    6574
     75// Command line arguments to specify file and directory includes/excludes
     76// use GLOB syntax, matching any full pathname suffix after a "/", or
     77// the full filename of any recursively selected file or directory.
     78re::RE * anchorToFullFileName(re::RE * glob) {
     79    return re::makeSeq({re::makeAlt({re::makeStart(), re::makeCC('/')}), glob, re::makeEnd()});
     80}
     81
    6682bool UseStdIn;
     83
     84re::RE * getDirectoryExcludePattern() {
     85    if (ExcludeDirFlag != "") {
     86        auto excludeDir = re::RE_Parser::parse(ExcludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
     87        return anchorToFullFileName(excludeDir);
     88    } else {
     89        return re::makeAlt();  // matches nothing, so excludes nothing.
     90    }
     91}
     92
     93re::RE * getDirectoryIncludePattern() {
     94    if (IncludeDirFlag != "") {
     95        auto dir = re::RE_Parser::parse(IncludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
     96        return anchorToFullFileName(dir);
     97    } else {
     98        return re::makeEnd();  // matches every line..
     99    }
     100}
    67101
    68102re::RE * getFileExcludePattern() {
     
    72106        patterns.push_back(glob);
    73107    }
    74    
    75108    if (ExcludeFromFlag != "") {
    76109        std::ifstream globFile(ExcludeFromFlag.c_str());
     
    84117        }
    85118    }
    86     if (patterns.empty()) return nullptr;
    87     return re::makeAlt(patterns.begin(), patterns.end());
    88 }
    89 
    90 re::RE * getDirectoryExcludePattern() {
    91     if (ExcludeDirFlag != "") {
    92         return re::RE_Parser::parse(ExcludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
    93     }
    94     return nullptr;
    95 }
    96 
     119    if (patterns.empty()) return re::makeAlt();  // matches nothing, so excludes nothing.
     120    return anchorToFullFileName(re::makeAlt(patterns.begin(), patterns.end()));
     121}
     122   
    97123re::RE * getFileIncludePattern() {
    98124    if (IncludeFlag != "") {
    99         return re::RE_Parser::parse(IncludeFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
    100     }
    101     return nullptr;
    102 }
    103 
    104 // Include is the default unless a -include= option exists and is prior to any -exclude
    105 // or -exclude-dir option.
    106 bool includeIsDefault() {
    107     if (IncludeFlag == "") return true;
    108     if ((ExcludeFlag != "") && (ExcludeOption.getPosition() < IncludeOption.getPosition())) return true;
    109     if ((ExcludeDirFlag != "") && (ExcludeDirOption.getPosition() < IncludeOption.getPosition())) return true;
    110     return false;
    111 }
    112    
    113    
    114    
     125        re::RE * includeSpec = re::RE_Parser::parse(IncludeFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
     126        includeSpec = anchorToFullFileName(includeSpec);
     127        return includeSpec;
     128    } else {
     129        return re::makeEnd();  // matches every line.
     130    }
     131}
     132
    115133namespace fs = boost::filesystem;
    116134
    117 // This is a stub, to be expanded later.
    118 bool excludeDirectory(fs::path dirpath) { return dirpath.filename() == ".svn";}
    119 
    120 // Determine whether to skip a path based on -D skip or -d skip settings.
    121 bool skip_path(fs::path p) {
    122     switch (fs::status(p).type()) {
    123         case fs::directory_file: return DirectoriesFlag == Skip;
    124         case fs::block_file:
    125         case fs::character_file:
    126         case fs::fifo_file:
    127         case fs::socket_file:
    128             return DevicesFlag == Skip;
    129         default:
    130             return false;
    131     }
    132 }
    133 
    134     void getSubdirectoryFiles(fs::path dirpath, std::vector<fs::path> & collectedFiles) {
     135//
     136//  Directory List: a set of directory paths that have been
     137//  examined to identify candidate files for searching, together
     138//  with a count of the number of candidate files in each directory.
     139//
     140//  FileName Buffer: an ordered sequence of NUL terminated filenames
     141//  for each candidate produced in the directory traversal.
     142//  The first mFullPathEntries entries are CWD paths.  Subsequent entries
     143//  are base file names relative to a directory.   The set
     144//  of all entries for a given directory are consecutive in the
     145//  buffer, and the sets are ordered consecutively by directory
     146//  index in the Directory List.
     147//
     148//  CollectedPaths: a vector of file paths to which the
     149//  selected files are added.
     150
     151class FileSelectAccumulator : public grep::MatchAccumulator {
     152public:
     153    FileSelectAccumulator(std::vector<fs::path> & collectedPaths) :
     154        mCollectedPaths(collectedPaths),
     155        mFullPathEntries(0)
     156    {}
     157    void setFullPathEntries(unsigned entries) {mFullPathEntries = entries; mDirectoryIndex = 0;}
     158    void reset();
     159    void addDirectory(fs::path dirPath, unsigned cumulativeEntryCount);
     160    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
     161protected:
     162    std::vector<fs::path> & mCollectedPaths;
     163    unsigned mFullPathEntries;
     164    unsigned mDirectoryIndex;
     165    std::vector<fs::path> mDirectoryList;
     166    std::vector<unsigned> mCumulativeEntryCount;
     167};
     168   
     169void FileSelectAccumulator::reset() {
     170    mCollectedPaths.clear();
     171    mFullPathEntries = 0;
     172    mDirectoryIndex = 0;
     173    mDirectoryList.clear();
     174    mCumulativeEntryCount.clear();
     175}
     176
     177void FileSelectAccumulator::addDirectory(fs::path dirPath, unsigned cumulativeEntryCount) {
     178    mDirectoryList.push_back(dirPath);
     179    mCumulativeEntryCount.push_back(cumulativeEntryCount);
     180}
     181
     182void FileSelectAccumulator::accumulate_match(const size_t fileIdx, char * name_start, char * name_end) {
     183    fs::path p(std::string(name_start, name_end - name_start));
     184    if (fileIdx < mFullPathEntries) {
     185        mCollectedPaths.push_back(p);
     186   } else {
     187        assert(mDirectoryIndex < mDirectoryList.size());
     188        while (fileIdx >= mCumulativeEntryCount[mDirectoryIndex]) {
     189            mDirectoryIndex++;
     190        }
     191        mCollectedPaths.emplace_back(mDirectoryList[mDirectoryIndex]/std::string(name_start, name_end - name_start));
     192    }
     193}
     194   
     195std::vector<fs::path> getFullFileList(cl::list<std::string> & inputFiles) {
     196    // The vector to accumulate the full list of collected files to be searched.
     197    std::vector<fs::path> collectedPaths;
     198    FileSelectAccumulator fileAccum(collectedPaths);
     199
     200    // In this pass through command line arguments and the file hierarchy,
     201    // we are just gathering file and subdirectory entries, so we silently
     202    // ignore errors.  We use the boost::filesystem operations that set
     203    // error codes rather than raise exceptions.
    135204    boost::system::error_code errc;
    136     fs::directory_iterator di(dirpath, errc);
    137     fs::directory_iterator di_end;
    138     if (errc) {
    139         // If we cannot enter the directory, keep it in the list of files.
    140         collectedFiles.push_back(dirpath);
    141         return;
    142     }
    143     //FileAccumulator accum(dirpath, collectedFiles);
    144     while (di != di_end) {
    145         auto & e = di->path();
    146         if (fs::is_directory(e)) {
    147             if (fs::is_symlink(e) && !DereferenceRecursiveFlag) {
    148                 continue;
    149             }
    150             if (!excludeDirectory(e)) {
    151                 getSubdirectoryFiles(e, collectedFiles);
    152             }
    153         } else {
    154             if (!skip_path(e)) {
    155                 collectedFiles.push_back(e);
    156             }
    157         }
    158         di.increment(errc);
    159         if (errc) {
    160             collectedFiles.push_back(e);
    161         }
    162     }
    163 }
    164 
    165 std::vector<fs::path> getFullFileList(cl::list<std::string> & inputFiles) {
    166     std::vector<fs::path> expanded_paths;
    167     boost::system::error_code errc;
     205   
     206    // At each level we gather candidate file and directory names and then
     207    // filter the names based on -include, -exclude, -include-dir, -excclude-dir,
     208    // and -exclude-from settings.
     209    //
     210    grep::SearchableBuffer dirCandidates;
     211    grep::SearchableBuffer fileCandidates;
     212
     213    // First level of processing:  command line files and directories.
    168214    for (const std::string & f : inputFiles) {
    169         if (f == "-") {
     215        if (f == "-") {  // stdin, will always be searched.
    170216            argv::UseStdIn = true;
    171217            continue;
    172218        }
    173219        fs::path p(f);
    174         if (skip_path(p)) {
    175             continue;
     220        if (errc) {
     221            // If there was an error, we leave the file in the fileCandidates
     222            // list for later error processing.
     223            fileCandidates.addSearchCandidate(p.c_str());
     224        } else if (fs::is_directory(p)) {
     225            if (DirectoriesFlag == Recurse) {
     226                dirCandidates.addSearchCandidate(p.c_str());
     227            } else if (DirectoriesFlag == Read) {
     228                fileCandidates.addSearchCandidate(p.c_str());
     229            }
     230        } else if (fs::is_regular_file(p)) {
     231            fileCandidates.addSearchCandidate(p.c_str());
     232        } else {
     233            // Devices and unknown file types
     234            if (DevicesFlag == Read) {
     235                fileCandidates.addSearchCandidate(p.c_str());
     236            }
    176237        }
    177         if (LLVM_UNLIKELY((DirectoriesFlag == Recurse) && fs::is_directory(p))) {
    178             if (!excludeDirectory(p)) {
    179                 getSubdirectoryFiles(p, expanded_paths);
     238    }
     239   
     240    auto commandLineDirCandidates = dirCandidates.getCandidateCount();
     241    auto commandLineFileCandidates = fileCandidates.getCandidateCount();
     242    fileAccum.setFullPathEntries(commandLineFileCandidates);
     243    if (commandLineDirCandidates > 0) {
     244        // Recursive processing of directories has been requested and we have
     245        // candidate directories from the command line.
     246   
     247        // selectedDirectories will accumulate hold the results of directory
     248        // include/exclude filtering at each level of processing.
     249        std::vector<fs::path> selectedDirectories;
     250       
     251        FileSelectAccumulator directoryAccum(selectedDirectories);
     252        grep::InternalSearchEngine directorySelectEngine;
     253        directorySelectEngine.setRecordBreak(grep::GrepRecordBreakKind::Null);
     254        directorySelectEngine.grepCodeGen
     255            (getDirectoryIncludePattern(), getDirectoryExcludePattern(), & directoryAccum);
     256       
     257        // The initial grep search determines which of the command line directories to process.
     258        // Each of these candidates is a full path return from command line argument processing.
     259        directoryAccum.setFullPathEntries(dirCandidates.getCandidateCount());
     260        directorySelectEngine.doGrep(dirCandidates.getBufferBase(), dirCandidates.getBufferSize());
     261
     262        while (!selectedDirectories.empty()) {
     263            // We now iterate through the full list of directories, gathering
     264            // entries from each.
     265            // (a) File entries are added into the global list of fileCandidates.
     266            // (b) Directory entries are added into a new list of candidates at each level.
     267
     268            grep::SearchableBuffer subdirCandidates;
     269            std::vector<fs::path> currentDirectories = selectedDirectories;
     270            directoryAccum.reset();
     271            // Iterate through all directories, collecting subdirectory and file candidates.
     272            for (auto & dirpath : currentDirectories) {
     273                boost::system::error_code errc;
     274                fs::directory_iterator di_end;
     275                fs::directory_iterator di(dirpath, errc);
     276                if (errc) {
     277                    // If we cannot enter the directory, keep it in the list of files,
     278                    // for possible error reporting.
     279                    fileCandidates.addSearchCandidate(dirpath.filename().c_str());
     280                    continue;
     281                }
     282                while (di != di_end) {
     283                    auto & e = di->path();
     284                    if (fs::is_directory(e)) {
     285                        if (fs::is_symlink(e) && !DereferenceRecursiveFlag) {
     286                            di.increment(errc);
     287                            continue;
     288                        }
     289                        subdirCandidates.addSearchCandidate(e.filename().c_str());
     290                    } else if (fs::is_regular_file(e)) {
     291                        fileCandidates.addSearchCandidate(e.filename().c_str());
     292                    } else {
     293                        // Devices and unknown file types
     294                        if (DevicesFlag == Read) {
     295                            fileCandidates.addSearchCandidate(e.filename().c_str());
     296                        }
     297                    }
     298                    di.increment(errc);
     299                    if (errc) break;
     300                }
     301                // For each directory, update counts for candidates generated at this level.
     302                //
     303                directoryAccum.addDirectory(dirpath, subdirCandidates.getCandidateCount());
     304                fileAccum.addDirectory(dirpath, fileCandidates.getCandidateCount());
    180305            }
    181         } else {
    182             expanded_paths.push_back(p);
    183         }
    184     }
    185     return expanded_paths;
    186 }
    187 
    188 }
     306            // Directory traversal at this level is complete.  Clear the directoryList,
     307            // so that it will accumulate only the selected entries from the gathered
     308            // buffer of subdirCandidates.
     309            selectedDirectories.clear();
     310            //
     311            //  Now do the search to produce the next level of selected subdirectories
     312            directorySelectEngine.doGrep(subdirCandidates.getBufferBase(), subdirCandidates.getBufferSize());
     313            // Thre search result has been written to directoryList, continue while we
     314            // have new subdirectories.
     315        } while (!selectedDirectories.empty());
     316    }
     317    //  All directories have been processed and all the fileCandidates in the SearchBuffer.
     318    //  Now determine which of the candidates should included or excluded from the search.
     319    //  The results will be accumulated in collectedPaths.
     320    grep::InternalSearchEngine fileSelectEngine;
     321    fileSelectEngine.setRecordBreak(grep::GrepRecordBreakKind::Null);
     322    fileSelectEngine.grepCodeGen
     323       (getFileIncludePattern(), getFileExcludePattern(), & fileAccum);
     324    fileSelectEngine.doGrep(fileCandidates.getBufferBase(), fileCandidates.getBufferSize());
     325    return collectedPaths;
     326}
     327
     328}
Note: See TracChangeset for help on using the changeset viewer.