Changeset 5965
- Timestamp:
- Apr 11, 2018, 11:30:29 AM (10 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/grep/grep_engine.h
r5964 r5965 183 183 184 184 185 #define MAX_SIMD_WIDTH_SUPPORTED 512186 #define INITIAL_CAPACITY 1024185 #define MAX_SIMD_WIDTH_SUPPORTED 256 186 #define INITIAL_CAPACITY 64 187 187 188 188 class SearchableBuffer { -
icGREP/icgrep-devel/icgrep/util/file_select.cpp
r5964 r5965 14 14 #include <re/parsers/parser.h> 15 15 #include <re/re_alt.h> 16 #include <re/re_seq.h> 17 #include <re/re_start.h> 18 #include <re/re_end.h> 19 #include <re/re_cc.h> 16 20 #include <re/re_toolchain.h> 21 #include <re/printer_re.h> 17 22 #include <grep/grep_engine.h> 18 23 #include <fstream> … … 44 49 45 50 std::string ExcludeDirFlag; 46 static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), cl::cat(Input_Options)); 51 static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), 52 cl::init(".svn"), cl::cat(Input_Options)); 53 54 std::string IncludeDirFlag; 55 static cl::opt<std::string, true> IncludeDirOption("include-dir", cl::location(IncludeDirFlag), cl::desc("Include directories matching the given pattern."), cl::cat(Input_Options)); 47 56 48 57 std::string IncludeFlag; … … 64 73 static cl::alias DirectoriesAlias("directories", cl::desc("Alias for -d"), cl::aliasopt(DirectoriesOption)); 65 74 75 // Command line arguments to specify file and directory includes/excludes 76 // use GLOB syntax, matching any full pathname suffix after a "/", or 77 // the full filename of any recursively selected file or directory. 78 re::RE * anchorToFullFileName(re::RE * glob) { 79 return re::makeSeq({re::makeAlt({re::makeStart(), re::makeCC('/')}), glob, re::makeEnd()}); 80 } 81 66 82 bool UseStdIn; 83 84 re::RE * getDirectoryExcludePattern() { 85 if (ExcludeDirFlag != "") { 86 auto excludeDir = re::RE_Parser::parse(ExcludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB); 87 return anchorToFullFileName(excludeDir); 88 } else { 89 return re::makeAlt(); // matches nothing, so excludes nothing. 90 } 91 } 92 93 re::RE * getDirectoryIncludePattern() { 94 if (IncludeDirFlag != "") { 95 auto dir = re::RE_Parser::parse(IncludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB); 96 return anchorToFullFileName(dir); 97 } else { 98 return re::makeEnd(); // matches every line.. 99 } 100 } 67 101 68 102 re::RE * getFileExcludePattern() { … … 72 106 patterns.push_back(glob); 73 107 } 74 75 108 if (ExcludeFromFlag != "") { 76 109 std::ifstream globFile(ExcludeFromFlag.c_str()); … … 84 117 } 85 118 } 86 if (patterns.empty()) return nullptr; 87 return re::makeAlt(patterns.begin(), patterns.end()); 88 } 89 90 re::RE * getDirectoryExcludePattern() { 91 if (ExcludeDirFlag != "") { 92 return re::RE_Parser::parse(ExcludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB); 93 } 94 return nullptr; 95 } 96 119 if (patterns.empty()) return re::makeAlt(); // matches nothing, so excludes nothing. 120 return anchorToFullFileName(re::makeAlt(patterns.begin(), patterns.end())); 121 } 122 97 123 re::RE * getFileIncludePattern() { 98 124 if (IncludeFlag != "") { 99 return re::RE_Parser::parse(IncludeFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB); 100 } 101 return nullptr; 102 } 103 104 // Include is the default unless a -include= option exists and is prior to any -exclude 105 // or -exclude-dir option. 106 bool includeIsDefault() { 107 if (IncludeFlag == "") return true; 108 if ((ExcludeFlag != "") && (ExcludeOption.getPosition() < IncludeOption.getPosition())) return true; 109 if ((ExcludeDirFlag != "") && (ExcludeDirOption.getPosition() < IncludeOption.getPosition())) return true; 110 return false; 111 } 112 113 114 125 re::RE * includeSpec = re::RE_Parser::parse(IncludeFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB); 126 includeSpec = anchorToFullFileName(includeSpec); 127 return includeSpec; 128 } else { 129 return re::makeEnd(); // matches every line. 130 } 131 } 132 115 133 namespace fs = boost::filesystem; 116 134 117 // This is a stub, to be expanded later. 118 bool excludeDirectory(fs::path dirpath) { return dirpath.filename() == ".svn";} 119 120 // Determine whether to skip a path based on -D skip or -d skip settings. 121 bool skip_path(fs::path p) { 122 switch (fs::status(p).type()) { 123 case fs::directory_file: return DirectoriesFlag == Skip; 124 case fs::block_file: 125 case fs::character_file: 126 case fs::fifo_file: 127 case fs::socket_file: 128 return DevicesFlag == Skip; 129 default: 130 return false; 131 } 132 } 133 134 void getSubdirectoryFiles(fs::path dirpath, std::vector<fs::path> & collectedFiles) { 135 // 136 // Directory List: a set of directory paths that have been 137 // examined to identify candidate files for searching, together 138 // with a count of the number of candidate files in each directory. 139 // 140 // FileName Buffer: an ordered sequence of NUL terminated filenames 141 // for each candidate produced in the directory traversal. 142 // The first mFullPathEntries entries are CWD paths. Subsequent entries 143 // are base file names relative to a directory. The set 144 // of all entries for a given directory are consecutive in the 145 // buffer, and the sets are ordered consecutively by directory 146 // index in the Directory List. 147 // 148 // CollectedPaths: a vector of file paths to which the 149 // selected files are added. 150 151 class FileSelectAccumulator : public grep::MatchAccumulator { 152 public: 153 FileSelectAccumulator(std::vector<fs::path> & collectedPaths) : 154 mCollectedPaths(collectedPaths), 155 mFullPathEntries(0) 156 {} 157 void setFullPathEntries(unsigned entries) {mFullPathEntries = entries; mDirectoryIndex = 0;} 158 void reset(); 159 void addDirectory(fs::path dirPath, unsigned cumulativeEntryCount); 160 void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override; 161 protected: 162 std::vector<fs::path> & mCollectedPaths; 163 unsigned mFullPathEntries; 164 unsigned mDirectoryIndex; 165 std::vector<fs::path> mDirectoryList; 166 std::vector<unsigned> mCumulativeEntryCount; 167 }; 168 169 void FileSelectAccumulator::reset() { 170 mCollectedPaths.clear(); 171 mFullPathEntries = 0; 172 mDirectoryIndex = 0; 173 mDirectoryList.clear(); 174 mCumulativeEntryCount.clear(); 175 } 176 177 void FileSelectAccumulator::addDirectory(fs::path dirPath, unsigned cumulativeEntryCount) { 178 mDirectoryList.push_back(dirPath); 179 mCumulativeEntryCount.push_back(cumulativeEntryCount); 180 } 181 182 void FileSelectAccumulator::accumulate_match(const size_t fileIdx, char * name_start, char * name_end) { 183 fs::path p(std::string(name_start, name_end - name_start)); 184 if (fileIdx < mFullPathEntries) { 185 mCollectedPaths.push_back(p); 186 } else { 187 assert(mDirectoryIndex < mDirectoryList.size()); 188 while (fileIdx >= mCumulativeEntryCount[mDirectoryIndex]) { 189 mDirectoryIndex++; 190 } 191 mCollectedPaths.emplace_back(mDirectoryList[mDirectoryIndex]/std::string(name_start, name_end - name_start)); 192 } 193 } 194 195 std::vector<fs::path> getFullFileList(cl::list<std::string> & inputFiles) { 196 // The vector to accumulate the full list of collected files to be searched. 197 std::vector<fs::path> collectedPaths; 198 FileSelectAccumulator fileAccum(collectedPaths); 199 200 // In this pass through command line arguments and the file hierarchy, 201 // we are just gathering file and subdirectory entries, so we silently 202 // ignore errors. We use the boost::filesystem operations that set 203 // error codes rather than raise exceptions. 135 204 boost::system::error_code errc; 136 fs::directory_iterator di(dirpath, errc); 137 fs::directory_iterator di_end; 138 if (errc) { 139 // If we cannot enter the directory, keep it in the list of files. 140 collectedFiles.push_back(dirpath); 141 return; 142 } 143 //FileAccumulator accum(dirpath, collectedFiles); 144 while (di != di_end) { 145 auto & e = di->path(); 146 if (fs::is_directory(e)) { 147 if (fs::is_symlink(e) && !DereferenceRecursiveFlag) { 148 continue; 149 } 150 if (!excludeDirectory(e)) { 151 getSubdirectoryFiles(e, collectedFiles); 152 } 153 } else { 154 if (!skip_path(e)) { 155 collectedFiles.push_back(e); 156 } 157 } 158 di.increment(errc); 159 if (errc) { 160 collectedFiles.push_back(e); 161 } 162 } 163 } 164 165 std::vector<fs::path> getFullFileList(cl::list<std::string> & inputFiles) { 166 std::vector<fs::path> expanded_paths; 167 boost::system::error_code errc; 205 206 // At each level we gather candidate file and directory names and then 207 // filter the names based on -include, -exclude, -include-dir, -excclude-dir, 208 // and -exclude-from settings. 209 // 210 grep::SearchableBuffer dirCandidates; 211 grep::SearchableBuffer fileCandidates; 212 213 // First level of processing: command line files and directories. 168 214 for (const std::string & f : inputFiles) { 169 if (f == "-") { 215 if (f == "-") { // stdin, will always be searched. 170 216 argv::UseStdIn = true; 171 217 continue; 172 218 } 173 219 fs::path p(f); 174 if (skip_path(p)) { 175 continue; 220 if (errc) { 221 // If there was an error, we leave the file in the fileCandidates 222 // list for later error processing. 223 fileCandidates.addSearchCandidate(p.c_str()); 224 } else if (fs::is_directory(p)) { 225 if (DirectoriesFlag == Recurse) { 226 dirCandidates.addSearchCandidate(p.c_str()); 227 } else if (DirectoriesFlag == Read) { 228 fileCandidates.addSearchCandidate(p.c_str()); 229 } 230 } else if (fs::is_regular_file(p)) { 231 fileCandidates.addSearchCandidate(p.c_str()); 232 } else { 233 // Devices and unknown file types 234 if (DevicesFlag == Read) { 235 fileCandidates.addSearchCandidate(p.c_str()); 236 } 176 237 } 177 if (LLVM_UNLIKELY((DirectoriesFlag == Recurse) && fs::is_directory(p))) { 178 if (!excludeDirectory(p)) { 179 getSubdirectoryFiles(p, expanded_paths); 238 } 239 240 auto commandLineDirCandidates = dirCandidates.getCandidateCount(); 241 auto commandLineFileCandidates = fileCandidates.getCandidateCount(); 242 fileAccum.setFullPathEntries(commandLineFileCandidates); 243 if (commandLineDirCandidates > 0) { 244 // Recursive processing of directories has been requested and we have 245 // candidate directories from the command line. 246 247 // selectedDirectories will accumulate hold the results of directory 248 // include/exclude filtering at each level of processing. 249 std::vector<fs::path> selectedDirectories; 250 251 FileSelectAccumulator directoryAccum(selectedDirectories); 252 grep::InternalSearchEngine directorySelectEngine; 253 directorySelectEngine.setRecordBreak(grep::GrepRecordBreakKind::Null); 254 directorySelectEngine.grepCodeGen 255 (getDirectoryIncludePattern(), getDirectoryExcludePattern(), & directoryAccum); 256 257 // The initial grep search determines which of the command line directories to process. 258 // Each of these candidates is a full path return from command line argument processing. 259 directoryAccum.setFullPathEntries(dirCandidates.getCandidateCount()); 260 directorySelectEngine.doGrep(dirCandidates.getBufferBase(), dirCandidates.getBufferSize()); 261 262 while (!selectedDirectories.empty()) { 263 // We now iterate through the full list of directories, gathering 264 // entries from each. 265 // (a) File entries are added into the global list of fileCandidates. 266 // (b) Directory entries are added into a new list of candidates at each level. 267 268 grep::SearchableBuffer subdirCandidates; 269 std::vector<fs::path> currentDirectories = selectedDirectories; 270 directoryAccum.reset(); 271 // Iterate through all directories, collecting subdirectory and file candidates. 272 for (auto & dirpath : currentDirectories) { 273 boost::system::error_code errc; 274 fs::directory_iterator di_end; 275 fs::directory_iterator di(dirpath, errc); 276 if (errc) { 277 // If we cannot enter the directory, keep it in the list of files, 278 // for possible error reporting. 279 fileCandidates.addSearchCandidate(dirpath.filename().c_str()); 280 continue; 281 } 282 while (di != di_end) { 283 auto & e = di->path(); 284 if (fs::is_directory(e)) { 285 if (fs::is_symlink(e) && !DereferenceRecursiveFlag) { 286 di.increment(errc); 287 continue; 288 } 289 subdirCandidates.addSearchCandidate(e.filename().c_str()); 290 } else if (fs::is_regular_file(e)) { 291 fileCandidates.addSearchCandidate(e.filename().c_str()); 292 } else { 293 // Devices and unknown file types 294 if (DevicesFlag == Read) { 295 fileCandidates.addSearchCandidate(e.filename().c_str()); 296 } 297 } 298 di.increment(errc); 299 if (errc) break; 300 } 301 // For each directory, update counts for candidates generated at this level. 302 // 303 directoryAccum.addDirectory(dirpath, subdirCandidates.getCandidateCount()); 304 fileAccum.addDirectory(dirpath, fileCandidates.getCandidateCount()); 180 305 } 181 } else { 182 expanded_paths.push_back(p); 183 } 184 } 185 return expanded_paths; 186 } 187 188 } 306 // Directory traversal at this level is complete. Clear the directoryList, 307 // so that it will accumulate only the selected entries from the gathered 308 // buffer of subdirCandidates. 309 selectedDirectories.clear(); 310 // 311 // Now do the search to produce the next level of selected subdirectories 312 directorySelectEngine.doGrep(subdirCandidates.getBufferBase(), subdirCandidates.getBufferSize()); 313 // Thre search result has been written to directoryList, continue while we 314 // have new subdirectories. 315 } while (!selectedDirectories.empty()); 316 } 317 // All directories have been processed and all the fileCandidates in the SearchBuffer. 318 // Now determine which of the candidates should included or excluded from the search. 319 // The results will be accumulated in collectedPaths. 320 grep::InternalSearchEngine fileSelectEngine; 321 fileSelectEngine.setRecordBreak(grep::GrepRecordBreakKind::Null); 322 fileSelectEngine.grepCodeGen 323 (getFileIncludePattern(), getFileExcludePattern(), & fileAccum); 324 fileSelectEngine.doGrep(fileCandidates.getBufferBase(), fileCandidates.getBufferSize()); 325 return collectedPaths; 326 } 327 328 }
Note: See TracChangeset
for help on using the changeset viewer.