Changeset 6229 for icGREP


Ignore:
Timestamp:
Dec 14, 2018, 6:13:54 PM (5 months ago)
Author:
nmedfort
Message:

Potential stall fix for -include=7*

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6221 r6229  
    5353#include <llvm/Support/raw_ostream.h>
    5454#include <util/file_select.h>
    55 #include <util/aligned_allocator.h>
    5655#include <sys/stat.h>
    5756#include <fcntl.h>
     
    6362#include <kernels/pipeline_builder.h>
    6463#include <sched.h>
     64#include <atomic>
    6565
    6666using namespace llvm;
     
    101101    assert ("sizeof(size_t) == sizeof(long)" && sizeof(size_t) == sizeof(long));
    102102    return (sizeof(size_t) * CHAR_BIT) - __builtin_clzl(v - 1UL);
    103 }
    104 
    105 void SearchableBuffer::addSearchCandidate(const char * C_string_ptr) {
    106     size_t length = strlen(C_string_ptr)+1;
    107     if (mSpace_used + length >= mAllocated_capacity) {
    108         size_t new_capacity = size_t{1} << (ceil_log2(mSpace_used + length + 1));
    109         AlignedAllocator<char, BUFFER_ALIGNMENT> alloc;
    110         char * new_buffer = mAllocator.allocate(new_capacity, 0);
    111         memcpy(new_buffer, mBuffer_base, mSpace_used);
    112         memset(&new_buffer[mSpace_used], 0, new_capacity-mSpace_used);
    113         if (mBuffer_base != mInitial_buffer) {
    114             alloc.deallocate(mBuffer_base, 0);
    115         }
    116         mBuffer_base = new_buffer;
    117         mAllocated_capacity = new_capacity;
    118     }
    119     memcpy((void * ) &mBuffer_base[mSpace_used], C_string_ptr, length);
    120     mSpace_used += length;
    121     assert("Search candidate not null terminated" && (mBuffer_base[mSpace_used] == '\0'));
    122     mEntries++;
    123 }
    124 
    125 SearchableBuffer::SearchableBuffer() :
    126     mAllocated_capacity(INITIAL_CAPACITY),
    127     mSpace_used(0),
    128     mEntries(0),
    129     mBuffer_base(mInitial_buffer) {
    130     memset(mBuffer_base, 0, INITIAL_CAPACITY);
    131 }
    132 
    133 SearchableBuffer::~SearchableBuffer() {
    134     if (mBuffer_base != mInitial_buffer) {
    135         mAllocator.deallocate(mBuffer_base, 0);
    136     }
    137103}
    138104
     
    197163}
    198164
    199 void GrepEngine::initFileResult(std::vector<boost::filesystem::path> & paths) {
     165void GrepEngine::initFileResult(const std::vector<boost::filesystem::path> & paths) {
    200166    const unsigned n = paths.size();
    201167    mResultStrs.resize(n);
     
    286252    Component internalComponents = Component::NoComponents;
    287253
    288    
     254
    289255
    290256    if (isSimple) {
     
    722688
    723689    unsigned printIdx = mNextFileToPrint++;
    724     while (printIdx < inputPaths.size()) {
    725         const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
    726         if (readyToPrint) {
    727             const auto output = mResultStrs[printIdx].str();
    728             if (!output.empty()) {
    729                 llvm::outs() << output;
    730             }
    731             mFileStatus[printIdx] = FileStatus::PrintComplete;
    732             printIdx = mNextFileToPrint++;
    733         }
    734         //sched_yield();
    735     }
    736 
    737     if (pthread_self() != mEngineThread) {
    738         pthread_exit(nullptr);
    739     } else {
     690    if (printIdx == 0) {
     691
     692        while (printIdx < inputPaths.size()) {
     693            const bool readyToPrint = (mFileStatus[printIdx] == FileStatus::GrepComplete);
     694            if (readyToPrint) {
     695                const auto output = mResultStrs[printIdx].str();
     696                if (!output.empty()) {
     697                    llvm::outs() << output;
     698                }
     699                mFileStatus[printIdx] = FileStatus::PrintComplete;
     700                printIdx = mNextFileToPrint++;
     701            } else {
     702                sched_yield();
     703            }
     704        }
     705
    740706        if (mGrepStdIn) {
    741707            std::ostringstream s;
     
    744710            if (grepResult) grepMatchFound = true;
    745711        }
     712    }
     713    if (pthread_self() != mEngineThread) {
     714        pthread_exit(nullptr);
    746715    }
    747716    return nullptr;
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r6213 r6229  
    1414#include <sstream>
    1515#include <atomic>
    16 #include <util/aligned_allocator.h>
    1716#include <boost/filesystem.hpp>
    1817
     
    2625
    2726namespace grep {
    28    
     27
    2928enum class GrepRecordBreakKind {Null, LF, Unicode};
    3029
     
    4241    bool mBinaryFile;
    4342};
    44    
     43
    4544class MatchAccumulator : public GrepCallBackObject {
    4645public:
     
    6564
    6665    virtual ~GrepEngine() = 0;
    67    
     66
    6867    void setPreferMMap() {mPreferMMap = true;}
    69    
     68
    7069    void showFileNames() {mShowFileNames = true;}
    7170    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
     
    8180    void setBinaryFilesOption(argv::BinaryFilesMode mode) {mBinaryFilesMode = mode;}
    8281    void setRecordBreak(GrepRecordBreakKind b);
    83     void initFileResult(std::vector<boost::filesystem::path> & filenames);
     82    void initFileResult(const std::vector<boost::filesystem::path> & filenames);
    8483    void initREs(std::vector<re::RE *> & REs);
    8584    virtual void grepCodeGen();
     
    133132    bool grepMatchFound;
    134133    GrepRecordBreakKind mGrepRecordBreak;
    135    
     134
    136135    std::vector<re:: RE *> mREs;
    137136    std::set<re::Name *> mUnicodeProperties;
     
    197196};
    198197
    199    
    200    
     198
     199
    201200class InternalSearchEngine {
    202201public:
     
    206205
    207206    ~InternalSearchEngine();
    208    
     207
    209208    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
    210209    void setCaseInsensitive()  {mCaseInsensitive = true;}
    211    
     210
    212211    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE);
    213    
     212
    214213    void doGrep(const char * search_buffer, size_t bufferLength, MatchAccumulator & accum);
    215    
     214
    216215private:
    217216    GrepRecordBreakKind mGrepRecordBreak;
     
    221220    void * mMainMethod;
    222221};
    223    
    224    
    225 #define MAX_SIMD_WIDTH_SUPPORTED 512
    226 #define INITIAL_CAPACITY (MAX_SIMD_WIDTH_SUPPORTED * 4)
    227    
    228 class SearchableBuffer  {
    229 public:
    230     SearchableBuffer();
    231     void addSearchCandidate(const char * string_ptr);
    232     size_t getCandidateCount() {return mEntries;}
    233     char * getBufferBase() {return mBuffer_base;}
    234     size_t getBufferSize() {return mSpace_used;}
    235     ~SearchableBuffer();
    236 private:
    237     static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
    238     AlignedAllocator<char, BUFFER_ALIGNMENT> mAllocator;
    239     size_t mAllocated_capacity;
    240     size_t mSpace_used;
    241     size_t mEntries;
    242     char * mBuffer_base;
    243     alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
    244 };
    245222
    246223}
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r6184 r6229  
    4444
    4545std::vector<re::RE *> readExpressions() {
    46  
     46
    4747    if (argv::FileFlag != "") {
    4848        std::ifstream regexFile(argv::FileFlag.c_str());
     
    5555        }
    5656    }
    57    
     57
    5858    // if there are no regexes specified through -e or -f, the first positional argument
    5959    // must be a regex, not an input file.
    60    
     60
    6161    if (argv::RegexpVector.size() == 0) {
    6262        argv::RegexpVector.push_back(inputFiles[0]);
     
    7373    }
    7474
    75    
     75
    7676    // If there are multiple REs, combine them into groups.
    7777    // A separate kernel will be created for each group.
     
    115115
    116116    argv::InitializeCommandLineInterface(argc, argv);
    117    
     117
    118118    auto REs = readExpressions();
    119119
    120     std::vector<fs::path> allFiles = argv::getFullFileList(inputFiles);
     120    const auto allFiles = argv::getFullFileList(inputFiles);
    121121    if (inputFiles.empty()) {
    122122        argv::UseStdIn = true;
     
    126126
    127127    CPUDriver driver("icgrep");
    128 
    129128    std::unique_ptr<grep::GrepEngine> grep;
    130 
    131129    switch (argv::Mode) {
    132130        case argv::NormalMode:
     
    168166    grep->initREs(REs);
    169167    grep->grepCodeGen();
    170     grep->initFileResult(allFiles);
     168    grep->initFileResult(allFiles); // unnecessary copy!
    171169    const bool matchFound = grep->searchAllFiles();
    172170
  • icGREP/icgrep-devel/icgrep/icgrep.files

    r6228 r6229  
     1CMakeLists.txt
     2grep/searchable_buffer.h
    13wc.cpp
    24base64.cpp
  • icGREP/icgrep-devel/icgrep/util/file_select.cpp

    r6184 r6229  
    2121#include <re/printer_re.h>
    2222#include <grep/grep_engine.h>
     23#include <grep/searchable_buffer.h>
    2324#include <toolchain/cpudriver.h>
    2425#include <fstream>
    2526#include <string>
     27#include <re/printer_re.h>
    2628
    2729using namespace llvm;
    2830
    2931namespace argv {
    30    
     32
    3133static cl::OptionCategory Input_Options("File Selection Options", "These options control the input sources.");
    3234
     
    125127    return anchorToFullFileName(re::makeAlt(patterns.begin(), patterns.end()));
    126128}
    127    
     129
    128130re::RE * getFileIncludePattern() {
    129131    if (IncludeFlag != "") {
     
    171173    std::vector<unsigned> mCumulativeEntryCount;
    172174};
    173    
     175
    174176void FileSelectAccumulator::reset() {
    175177    mCollectedPaths.clear();
     
    198200}
    199201
    200 
    201 
    202    
    203202std::vector<fs::path> getFullFileList(cl::list<std::string> & inputFiles) {
    204203    // The vector to accumulate the full list of collected files to be searched.
    205204    std::vector<fs::path> collectedPaths;
    206    
     205
    207206    // In this pass through command line arguments and the file hierarchy,
    208207    // we are just gathering file and subdirectory entries, so we silently
    209208    // ignore errors.  We use the boost::filesystem operations that set
    210209    // error codes rather than raise exceptions.
    211     boost::system::error_code errc;
    212    
     210
    213211    // In non-recursive greps with no include/exclude processing, we simply assemble the
    214212    // paths.
    215     if ((DirectoriesFlag != Recurse) && (ExcludeFlag == "") && (IncludeFlag == "") && (ExcludeFromFlag == "")) {
     213    if ((DirectoriesFlag != Recurse) && (ExcludeFlag.empty()) && (IncludeFlag.empty()) && (ExcludeFromFlag.empty())) {
    216214        for (const std::string & f : inputFiles) {
    217215            if (f == "-") {  // stdin, will always be searched.
     
    220218            }
    221219            fs::path p(f);
     220            boost::system::error_code errc;
    222221            fs::file_status s = fs::status(p, errc);
    223222            if (errc) {
     
    240239        return collectedPaths;
    241240    }
    242    
     241
    243242    // Otherwise we need to filter paths according to some include/exclude rules.
    244    
     243
    245244    FileSelectAccumulator fileAccum(collectedPaths);
    246    
     245
    247246    // At each level we gather candidate file and directory names and then
    248247    // filter the names based on -include, -exclude, -include-dir, -excclude-dir,
     
    259258        }
    260259        fs::path p(f);
     260        boost::system::error_code errc;
    261261        fs::file_status s = fs::status(p, errc);
    262262        if (errc) {
    263263            // If there was an error, we leave the file in the fileCandidates
    264264            // list for later error processing.
    265             if (!NoMessagesFlag) fileCandidates.addSearchCandidate(p.c_str());
     265            if (!NoMessagesFlag) fileCandidates.append(p.string());
    266266        } else if (fs::is_directory(s)) {
    267267            if (DirectoriesFlag == Recurse) {
    268                 dirCandidates.addSearchCandidate(p.c_str());
     268                dirCandidates.append(p.string());
    269269            } else if (DirectoriesFlag == Read) {
    270                 fileCandidates.addSearchCandidate(p.c_str());
     270                fileCandidates.append(p.string());
    271271            }
    272272        } else if (fs::is_regular_file(s)) {
    273             fileCandidates.addSearchCandidate(p.c_str());
     273            fileCandidates.append(p.string());
    274274        } else {
    275275            // Devices and unknown file types
    276276            if (DevicesFlag == Read) {
    277                 fileCandidates.addSearchCandidate(p.c_str());
    278             }
    279         }
    280     }
    281    
     277                fileCandidates.append(p.string());
     278            }
     279        }
     280    }
     281
    282282    auto commandLineDirCandidates = dirCandidates.getCandidateCount();
    283283    auto commandLineFileCandidates = fileCandidates.getCandidateCount();
     
    286286        // Recursive processing of directories has been requested and we have
    287287        // candidate directories from the command line.
    288    
     288
    289289        // selectedDirectories will accumulate hold the results of directory
    290290        // include/exclude filtering at each level of processing.
    291291        std::vector<fs::path> selectedDirectories;
    292        
    293292        FileSelectAccumulator directoryAccum(selectedDirectories);
     293
    294294        CPUDriver driver("driver");
    295295        grep::InternalSearchEngine directorySelectEngine(driver);
    296296        directorySelectEngine.setRecordBreak(grep::GrepRecordBreakKind::Null);
    297297        directorySelectEngine.grepCodeGen(getDirectoryIncludePattern(), getDirectoryExcludePattern());
    298        
     298
    299299        // The initial grep search determines which of the command line directories to process.
    300300        // Each of these candidates is a full path return from command line argument processing.
    301301        directoryAccum.setFullPathEntries(dirCandidates.getCandidateCount());
    302         directorySelectEngine.doGrep(dirCandidates.getBufferBase(), dirCandidates.getBufferSize(), directoryAccum);
     302        directorySelectEngine.doGrep(dirCandidates.data(), dirCandidates.size(), directoryAccum);
     303        grep::SearchableBuffer subdirCandidates;
    303304
    304305        while (!selectedDirectories.empty()) {
     
    308309            // (b) Directory entries are added into a new list of candidates at each level.
    309310
    310             grep::SearchableBuffer subdirCandidates;
    311             std::vector<fs::path> currentDirectories = selectedDirectories;
     311            std::vector<fs::path> currentDirectories;
     312            assert (currentDirectories.empty());
     313            currentDirectories.swap(selectedDirectories);
     314            assert (selectedDirectories.empty());
     315            assert (!currentDirectories.empty());
     316
     317            subdirCandidates.reset();
    312318            directoryAccum.reset();
     319
    313320            // Iterate through all directories, collecting subdirectory and file candidates.
    314             for (auto & dirpath : currentDirectories) {
     321            for (const auto & dirpath : currentDirectories) {
    315322                boost::system::error_code errc;
    316                 fs::directory_iterator di_end;
    317323                fs::directory_iterator di(dirpath, errc);
    318324                if (errc) {
    319325                    // If we cannot enter the directory, keep it in the list of files,
    320326                    // for possible error reporting.
    321                     if (!NoMessagesFlag) fileCandidates.addSearchCandidate(dirpath.filename().c_str());
     327                    if (!NoMessagesFlag) {
     328                        fileCandidates.append(dirpath.filename().string());
     329                    }
    322330                    continue;
    323331                }
     332
     333                const auto di_end = fs::directory_iterator();
    324334                while (di != di_end) {
    325                     auto & e = di->path();
    326                     fs::file_status s = fs::status(e, errc);
     335                    const auto & e = di->path();
     336                    boost::system::error_code errc;
     337                    const auto s = fs::status(e, errc);
    327338                    if (errc) {
    328339                        // If there was an error, we leave the file in the fileCandidates
    329340                        // list for later error processing.
    330                         if (!NoMessagesFlag) fileCandidates.addSearchCandidate(e.filename().c_str());
     341                        if (!NoMessagesFlag) {
     342                            fileCandidates.append(e.filename().string());
     343                        }
    331344                    } else if (fs::is_directory(s)) {
    332345                        if (fs::is_symlink(s) && !DereferenceRecursiveFlag) {
     
    334347                            continue;
    335348                        }
    336                         subdirCandidates.addSearchCandidate(e.filename().c_str());
    337                     } else if (fs::is_regular_file(s)) {
    338                         fileCandidates.addSearchCandidate(e.filename().c_str());
    339                     } else {
    340                         // Devices and unknown file types
    341                         if (DevicesFlag == Read) {
    342                             fileCandidates.addSearchCandidate(e.filename().c_str());
    343                         }
     349                        subdirCandidates.append(e.filename().string());
     350                    } else if (fs::is_regular_file(s) || DevicesFlag == Read) {
     351                        fileCandidates.append(e.filename().string());
    344352                    }
    345                     di.increment(errc);
    346                     if (errc) break;
     353                    boost::system::error_code errc2;
     354                    di.increment(errc2);
     355                    if (errc2) break;
    347356                }
    348357                // For each directory, update counts for candidates generated at this level.
     
    351360                fileAccum.addDirectory(dirpath, fileCandidates.getCandidateCount());
    352361            }
    353             // Directory traversal at this level is complete.  Clear the directoryList,
    354             // so that it will accumulate only the selected entries from the gathered
    355             // buffer of subdirCandidates.
    356             selectedDirectories.clear();
    357             //
    358             //  Now do the search to produce the next level of selected subdirectories
    359             directorySelectEngine.doGrep(subdirCandidates.getBufferBase(), subdirCandidates.getBufferSize(), directoryAccum);
     362
     363
     364            // Now do the search to produce the next level of selected subdirectories
     365            directorySelectEngine.doGrep(subdirCandidates.data(), subdirCandidates.size(), directoryAccum);
    360366            // Thre search result has been written to directoryList, continue while we
    361367            // have new subdirectories.
    362         } while (!selectedDirectories.empty());
     368        }
    363369    }
    364370    //  All directories have been processed and all the fileCandidates in the SearchBuffer.
    365371    //  Now determine which of the candidates should included or excluded from the search.
    366372    //  The results will be accumulated in collectedPaths.
     373
    367374    CPUDriver driver("driver");
    368375    grep::InternalSearchEngine fileSelectEngine(driver);
    369376    fileSelectEngine.setRecordBreak(grep::GrepRecordBreakKind::Null);
    370377    fileSelectEngine.grepCodeGen(getFileIncludePattern(), getFileExcludePattern());
    371     fileSelectEngine.doGrep(fileCandidates.getBufferBase(), fileCandidates.getBufferSize(), fileAccum);
     378    fileSelectEngine.doGrep(fileCandidates.data(), fileCandidates.size(), fileAccum);
    372379    return collectedPaths;
    373380}
Note: See TracChangeset for help on using the changeset viewer.