source: icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp @ 5998

Last change on this file since 5998 was 5998, checked in by nmedfort, 12 months ago

Added temporary buffer functionality to the pipeline for single stream source buffers. Fixed memory leak from UCD::UnicodeBreakRE()

File size: 40.1 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#include <set>
7#include "grep_engine.h"
8#include <llvm/IR/Module.h>
9#include <boost/filesystem.hpp>
10#include <UCD/resolve_properties.h>
11#include <kernels/charclasses.h>
12#include <kernels/cc_kernel.h>
13#include <kernels/grep_kernel.h>
14#include <kernels/UCD_property_kernel.h>
15#include <kernels/grapheme_kernel.h>
16#include <kernels/linebreak_kernel.h>
17#include <kernels/streams_merge.h>
18#include <kernels/source_kernel.h>
19#include <kernels/s2p_kernel.h>
20#include <kernels/scanmatchgen.h>
21#include <kernels/streamset.h>
22#include <kernels/until_n.h>
23#include <kernels/kernel_builder.h>
24#include <pablo/pablo_kernel.h>
25#include <cc/alphabet.h>
26#include <re/re_cc.h>
27#include <re/re_alt.h>
28#include <re/re_end.h>
29#include <re/re_name.h>
30#include <re/casing.h>
31#include <re/exclude_CC.h>
32#include <re/to_utf8.h>
33#include <re/re_toolchain.h>
34#include <toolchain/toolchain.h>
35#include <re/re_analysis.h>
36#include <re/re_name_resolve.h>
37#include <re/re_name_gather.h>
38#include <re/collect_ccs.h>
39#include <re/replaceCC.h>
40#include <re/re_multiplex.h>
41#include <re/grapheme_clusters.h>
42#include <re/re_utility.h>
43#include <re/printer_re.h>
44#include <toolchain/toolchain.h>
45#include <toolchain/cpudriver.h>
46#include <iostream>
47#include <cc/multiplex_CCs.h>
48#include <llvm/Support/raw_ostream.h>
49#include <util/file_select.h>
50#include <util/aligned_allocator.h>
51#include <sys/stat.h>
52#include <fcntl.h>
53#include <errno.h>
54#include <llvm/ADT/STLExtras.h> // for make_unique
55#include <llvm/Support/CommandLine.h>
56#include <llvm/Support/Debug.h>
57#include <llvm/Support/Casting.h>
58#include <sched.h>
59
60using namespace parabix;
61using namespace llvm;
62using namespace cc;
63using namespace kernel;
64
65static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(2));
66static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
67static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
68static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
69static cl::opt<bool> MultithreadedSimpleRE("enable-simple-RE-kernels", cl::desc("Enable individual CC kernels for simple REs."), cl::init(false));
70const unsigned DefaultByteCClimit = 6;
71
72static cl::opt<unsigned> ByteCClimit("byte-CC-limit", cl::desc("Max number of CCs for byte CC pipeline."), cl::init(DefaultByteCClimit));
73
74
75namespace grep {
76   
77extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal) {
78    reinterpret_cast<GrepCallBackObject *>(callback_object_addr)->handle_signal(signal);
79}
80   
81void GrepCallBackObject::handle_signal(unsigned s) {
82    if (static_cast<GrepSignal>(s) == GrepSignal::BinaryFile) {
83        mBinaryFile = true;
84    } else {
85        llvm::report_fatal_error("Unknown GrepSignal");
86    }
87}
88
89extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end) {
90    reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
91}
92
93extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end) {
94    reinterpret_cast<MatchAccumulator *>(accum_addr)->finalize_match(buffer_end);
95}
96   
97
98inline static size_t ceil_log2(const size_t v) {
99    assert ("log2(0) is undefined!" && v != 0);
100    assert ("sizeof(size_t) == sizeof(long)" && sizeof(size_t) == sizeof(long));
101    return (sizeof(size_t) * CHAR_BIT) - __builtin_clzl(v - 1UL);
102}
103
104void SearchableBuffer::addSearchCandidate(const char * C_string_ptr) {
105    size_t length = strlen(C_string_ptr)+1;
106    if (mSpace_used + length >= mAllocated_capacity) {
107        size_t new_capacity = size_t{1} << (ceil_log2(mSpace_used + length + 1));
108        AlignedAllocator<char, BUFFER_ALIGNMENT> alloc;
109        char * new_buffer = mAllocator.allocate(new_capacity, 0);
110        memcpy(new_buffer, mBuffer_base, mSpace_used);
111        memset(&new_buffer[mSpace_used], 0, new_capacity-mSpace_used);
112        if (mBuffer_base != mInitial_buffer) {
113            alloc.deallocate(mBuffer_base, 0);
114        }
115        mBuffer_base = new_buffer;
116        mAllocated_capacity = new_capacity;
117    }
118    memcpy((void * ) &mBuffer_base[mSpace_used], C_string_ptr, length);
119    mSpace_used += length;
120    assert("Search candidate not null terminated" && (mBuffer_base[mSpace_used] == '\0'));
121    mEntries++;
122}
123
124SearchableBuffer::SearchableBuffer() :
125    mAllocated_capacity(INITIAL_CAPACITY),
126    mSpace_used(0),
127    mEntries(0),
128    mBuffer_base(mInitial_buffer) {
129    memset(mBuffer_base, 0, INITIAL_CAPACITY);
130}
131
132SearchableBuffer::~SearchableBuffer() {
133    if (mBuffer_base != mInitial_buffer) {
134        mAllocator.deallocate(mBuffer_base, 0);
135    }
136}
137
138
139
140// Grep Engine construction and initialization.
141
142GrepEngine::GrepEngine() :
143    mSuppressFileMessages(false),
144    mBinaryFilesMode(argv::Text),
145    mPreferMMap(true),
146    mShowFileNames(false),
147    mStdinLabel("(stdin)"),
148    mShowLineNumbers(false),
149    mInitialTab(false),
150    mCaseInsensitive(false),
151    mInvertMatches(false),
152    mMaxCount(0),
153    mGrepStdIn(false),
154    mGrepDriver(make_unique<ParabixDriver>("engine")),
155    mNextFileToGrep(0),
156    mNextFileToPrint(0),
157    grepMatchFound(false),
158    mGrepRecordBreak(GrepRecordBreakKind::LF),
159    mMoveMatchesToEOL(true),
160    mEngineThread(pthread_self()) {}
161
162QuietModeEngine::QuietModeEngine() : GrepEngine() {
163    mEngineKind = EngineKind::QuietMode;
164    mMoveMatchesToEOL = false;
165    mMaxCount = 1;
166}
167
168MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithMatch, bool useNullSeparators) :
169    GrepEngine(), mRequiredCount(showFilesWithMatch) {
170    mEngineKind = EngineKind::MatchOnly;
171    mFileSuffix = useNullSeparators ? std::string("\0", 1) : "\n";
172    mMoveMatchesToEOL = false;
173    mMaxCount = 1;
174    mShowFileNames = true;
175}
176
177CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
178    mEngineKind = EngineKind::CountOnly;
179    mFileSuffix = ":";
180}
181
182EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
183    mEngineKind = EngineKind::EmitMatches;
184    mFileSuffix = mInitialTab ? "\t:" : ":";
185}
186
187   
188void GrepEngine::setRecordBreak(GrepRecordBreakKind b) {
189    mGrepRecordBreak = b;
190}
191
192   
193
194   
195void GrepEngine::initFileResult(std::vector<boost::filesystem::path> & paths) {
196    const unsigned n = paths.size();
197    mResultStrs.resize(n);
198    mFileStatus.resize(n, FileStatus::Pending);
199    inputPaths = paths;
200}
201
202void GrepEngine::initREs(std::vector<re::RE *> & REs) {
203    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
204        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
205    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
206        mBreakCC = re::makeByte(0);  // Null
207    } else {
208        mBreakCC = re::makeByte(0x0A); // LF
209    }
210    re::RE * anchorRE = mBreakCC;
211    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
212        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
213        anchorName->setDefinition(re::makeUnicodeBreak());
214        anchorRE = anchorName;
215    }
216   
217    mREs = REs;
218    bool allAnchored = true;
219    for(unsigned i = 0; i < mREs.size(); ++i) {
220        if (!hasEndAnchor(mREs[i])) allAnchored = false;
221        mREs[i] = resolveModesAndExternalSymbols(mREs[i], mCaseInsensitive);
222        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
223        mREs[i] = resolveAnchors(mREs[i], anchorRE);
224        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
225        mREs[i] = regular_expression_passes(mREs[i]);
226    }
227    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
228
229}
230
231
232   
233// Code Generation
234//
235// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
236
237unsigned LLVM_READNONE calculateMaxCountRate(const std::unique_ptr<kernel::KernelBuilder> & b) {
238    const unsigned packSize = b->getSizeTy()->getBitWidth();
239    return (packSize * packSize) / b->getBitBlockWidth();
240}
241   
242std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(StreamSetBuffer * SourceStream, Value * callback_object_addr) {
243    auto & idb = mGrepDriver->getBuilder();
244    const unsigned segmentSize = codegen::SegmentSize;
245    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
246    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
247    const unsigned baseBufferSize = segmentSize * (mMaxCount > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
248    const unsigned encodingBits = 8;
249   
250   
251    //  Regular Expression Processing and Analysis Phase
252    const auto nREs = mREs.size();
253    bool hasGCB[nREs];
254    bool anyGCB = false;
255
256    for(unsigned i = 0; i < nREs; ++i) {
257        hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
258        anyGCB |= hasGCB[i];
259    }
260    StreamSetBuffer * ByteStream = nullptr;
261    if (mBinaryFilesMode == argv::Text) {
262        ByteStream = SourceStream;
263    } else if (mBinaryFilesMode == argv::WithoutMatch) {
264        ByteStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 8), baseBufferSize);
265        kernel::Kernel * binaryCheckK = mGrepDriver->addKernelInstance<kernel::AbortOnNull>(idb);
266        binaryCheckK->setInitialArguments({callback_object_addr});
267        mGrepDriver->makeKernelCall(binaryCheckK, {SourceStream}, {ByteStream});
268        mGrepDriver->LinkFunction(*binaryCheckK, "signal_dispatcher", &signal_dispatcher);
269    } else {
270        llvm::report_fatal_error("Binary mode not supported.");
271    }
272    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
273    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
274   
275    re::RE * prefixRE;
276    re::RE * suffixRE;
277    // For simple regular expressions with a small number of characters, we
278    // can bypass transposition and use the Direct CC compiler.
279    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
280    if (isSimple) {
281        mREs[0] = toUTF8(mREs[0]);
282    }
283    if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
284        std::vector<std::string> externalStreamNames;
285        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
286        if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
287            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
288            for (auto cc : CCs) {
289                auto ccName = makeName(cc);
290                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
291                std::string ccNameStr = ccName->getFullName();
292                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
293                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
294                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
295                externalStreamNames.push_back(ccNameStr);
296                icgrepInputSets.push_back(ccStream);
297            }
298        }
299        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
300        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
301        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
302        MatchResultsBufs[0] = MatchResults;
303        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
304        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
305    } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
306        std::vector<std::string> externalStreamNames;
307        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
308        if (MultithreadedSimpleRE) {
309            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
310            for (auto cc : CCs) {
311                auto ccName = makeName(cc);
312                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
313                std::string ccNameStr = ccName->getFullName();
314                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
315                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
316                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
317                externalStreamNames.push_back(ccNameStr);
318                icgrepInputSets.push_back(ccStream);
319            }
320        }
321        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
322        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
323        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
324        MatchResultsBufs[0] = MatchResults;
325        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
326        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
327    } else {
328       
329        StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
330        kernel::Kernel * s2pk = nullptr;
331        if (PabloTransposition) {
332            s2pk = mGrepDriver->addKernelInstance<kernel::S2P_PabloKernel>(idb);
333        }
334        else {
335            s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
336        }
337        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
338
339        StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
340        StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
341
342        StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
343        kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
344        mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
345       
346        kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
347        mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
348
349        if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
350            LineBreakStream = LineFeedStream;
351        } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
352            kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
353            mGrepDriver->makeKernelCall(breakK, {BasisBits}, {LineBreakStream});
354        } else {
355            LineBreakStream = UnicodeLB;
356        }
357       
358        std::map<std::string, StreamSetBuffer *> propertyStream;
359        if (PropertyKernels) {
360            for (auto p : mUnicodeProperties) {
361                auto name = p->getFullName();
362                StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
363                propertyStream.emplace(std::make_pair(name, s));
364                kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
365                mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
366            }
367        }
368        StreamSetBuffer * GCB_stream = nullptr;
369        if (anyGCB) {
370            GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
371            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
372            mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
373        }
374
375        for(unsigned i = 0; i < nREs; ++i) {
376            std::vector<std::string> externalStreamNames;
377            std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits};
378            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
379                externalStreamNames.push_back("UTF8_LB");
380                icgrepInputSets.push_back(LineBreakStream);
381                externalStreamNames.push_back("UTF8_nonfinal");
382                icgrepInputSets.push_back(RequiredStreams);
383            }
384            std::set<re::Name *> UnicodeProperties;
385            if (PropertyKernels) {
386                re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
387                for (auto p : UnicodeProperties) {
388                    auto name = p->getFullName();
389                    auto f = propertyStream.find(name);
390                    if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
391                    externalStreamNames.push_back(name);
392                    icgrepInputSets.push_back(f->second);
393                }
394            }
395            if (hasGCB[i]) {
396                externalStreamNames.push_back("\\b{g}");
397                icgrepInputSets.push_back(GCB_stream);
398            }
399            if (CC_Multiplexing) {
400                const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
401                StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
402                if (UnicodeSets.size() <= 1) {
403                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
404                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
405                    MatchResultsBufs[i] = MatchResults;
406                } else {
407                    mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
408                    mREs[i] = transformCCs(mpx.get(), mREs[i]);
409                    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
410                    auto numOfCharacterClasses = mpx_basis.size();
411                    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
412                    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
413                    mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
414    //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
415    //                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
416                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
417                    icgrepInputSets.push_back(CharClasses);
418                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
419                    MatchResultsBufs[i] = MatchResults;
420                }
421            } else {
422                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
423                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
424                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
425                MatchResultsBufs[i] = MatchResults;
426            }
427        }
428    }
429
430    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
431    if (mREs.size() > 1) {
432        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
433        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
434        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
435    }
436    StreamSetBuffer * Matches = MergedResults;
437    if (mMoveMatchesToEOL) {
438        StreamSetBuffer * OriginalMatches = Matches;
439        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
440        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
441        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
442    }
443    if (mInvertMatches) {
444        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
445        StreamSetBuffer * OriginalMatches = Matches;
446        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
447        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
448    }
449    if (mMaxCount > 0) {
450        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
451        untilK->setInitialArguments({idb->getSize(mMaxCount)});
452        StreamSetBuffer * const AllMatches = Matches;
453        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
454        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
455    }
456
457    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
458}
459
460// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
461// which returns a count of the matches found (possibly subject to a MaxCount).
462//
463
464void GrepEngine::grepCodeGen() {
465    auto & idb = mGrepDriver->getBuilder();
466    Module * M = idb->getModule();
467
468    const unsigned encodingBits = 8;
469
470    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
471    mainFunc->setCallingConv(CallingConv::C);
472    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
473    auto args = mainFunc->arg_begin();
474
475    Value * const useMMap = &*(args++);
476    useMMap->setName("useMMap");
477    Value * const fileDescriptor = &*(args++);
478    fileDescriptor->setName("fileDescriptor");
479    Value * call_back_object = &*(args++);
480    call_back_object->setName("call_back_object");
481
482    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
483    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
484    sourceK->setInitialArguments({useMMap, fileDescriptor});
485    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
486
487    StreamSetBuffer * LineBreakStream;
488    StreamSetBuffer * Matches;
489    std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream, call_back_object);
490
491    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance<kernel::PopcountKernel>(idb);
492    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
493    mGrepDriver->generatePipelineIR();
494    idb->setKernel(matchCountK);
495    Value * matchedLineCount = idb->getAccumulator("countResult");
496    matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
497    mGrepDriver->deallocateBuffers();
498    idb->CreateRet(matchedLineCount);
499   
500    mGrepDriver->finalizeObject();
501}
502
503//
504//  Default Report Match:  lines are emitted with whatever line terminators are found in the
505//  input.  However, if the final line is not terminated, a new line is appended.
506//
507void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
508    mResultStr << mLinePrefix;
509    if (mShowLineNumbers) {
510        // Internally line numbers are counted from 0.  For display, adjust
511        // the line number so that lines are numbered from 1.
512        if (mInitialTab) {
513            mResultStr << lineNum+1 << "\t:";
514        }
515        else {
516            mResultStr << lineNum+1 << ":";
517        }
518    }
519    size_t bytes = line_end - line_start + 1;
520    mResultStr.write(line_start, bytes);
521    mLineCount++;
522    unsigned last_byte = *line_end;
523    mTerminated = (last_byte >= 0x0A) && (last_byte <= 0x0D);
524    if (LLVM_UNLIKELY(!mTerminated)) {
525        if (last_byte == 0x85) {  //  Possible NEL terminator.
526            mTerminated = (bytes >= 2) && (static_cast<unsigned>(line_end[-1]) == 0xC2);
527        }
528        else {
529            // Possible LS or PS terminators.
530            mTerminated = (bytes >= 3) && (static_cast<unsigned>(line_end[-2]) == 0xE2)
531                                       && (static_cast<unsigned>(line_end[-1]) == 0x80)
532                                       && ((last_byte == 0xA8) || (last_byte == 0xA9));
533        }
534    }
535}
536
537void EmitMatch::finalize_match(char * buffer_end) {
538    if (!mTerminated) mResultStr << "\n";
539}
540
541void EmitMatchesEngine::grepCodeGen() {
542    auto & idb = mGrepDriver->getBuilder();
543    Module * M = idb->getModule();
544
545    const unsigned encodingBits = 8;
546
547    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
548    mainFunc->setCallingConv(CallingConv::C);
549    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
550    auto args = mainFunc->arg_begin();
551
552    Value * const useMMap = &*(args++);
553    useMMap->setName("useMMap");
554    Value * const fileDescriptor = &*(args++);
555    fileDescriptor->setName("fileDescriptor");
556    Value * match_accumulator = &*(args++);
557    match_accumulator->setName("match_accumulator");
558
559    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
560    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
561    sourceK->setInitialArguments({useMMap, fileDescriptor});
562    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
563
564    StreamSetBuffer * LineBreakStream;
565    StreamSetBuffer * Matches;
566    std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream, match_accumulator);
567
568    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
569    scanMatchK->setInitialArguments({match_accumulator});
570    mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
571    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
572    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
573
574    mGrepDriver->generatePipelineIR();
575    mGrepDriver->deallocateBuffers();
576    idb->CreateRet(idb->getInt64(0));
577    mGrepDriver->finalizeObject();
578}
579
580
581//
582//  The doGrep methods apply a GrepEngine to a single file, processing the results
583//  differently based on the engine type.
584
585uint64_t GrepEngine::doGrep(const std::string & fileName, std::ostringstream & strm) {
586    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t callback_addr);
587    using namespace boost::filesystem;
588    path p(fileName);
589    bool useMMap = mPreferMMap;
590    if (p == "-") useMMap = false;
591    if (!is_regular_file(p)) useMMap = false;
592
593    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
594
595    int32_t fileDescriptor = openFile(fileName, strm);
596    if (fileDescriptor == -1) return 0;
597    GrepCallBackObject handler;
598    uint64_t grepResult = f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&handler));
599    close(fileDescriptor);
600    if (handler.binaryFileSignalled()) {
601        return 0;
602    }
603    else {
604        showResult(grepResult, fileName, strm);
605        return grepResult;
606    }
607}
608
609std::string GrepEngine::linePrefix(std::string fileName) {
610    if (!mShowFileNames) return "";
611    if (fileName == "-") {
612        return mStdinLabel + mFileSuffix;
613    }
614    else {
615        return fileName + mFileSuffix;
616    }
617}
618
619// Default: do not show anything
620void GrepEngine::showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) {
621}
622   
623void CountOnlyEngine::showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) {
624    if (mShowFileNames) strm << linePrefix(fileName);
625    strm << grepResult << "\n";
626}
627   
628void MatchOnlyEngine::showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) {
629    if (grepResult == mRequiredCount) {
630       strm << linePrefix(fileName);
631    }
632}
633
634uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, std::ostringstream & strm) {
635    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t accum_addr);
636    using namespace boost::filesystem;
637    path p(fileName);
638    bool useMMap = mPreferMMap;
639    if (p == "-") useMMap = false;
640    if (!is_regular_file(p)) useMMap = false;
641    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
642    int32_t fileDescriptor = openFile(fileName, strm);
643    if (fileDescriptor == -1) return 0;
644    EmitMatch accum(linePrefix(fileName), mShowLineNumbers, mInitialTab, strm);
645    f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&accum));
646    close(fileDescriptor);
647    if (accum.binaryFileSignalled()) {
648        accum.mResultStr.clear();
649    }
650    if (accum.mLineCount > 0) grepMatchFound = true;
651    return accum.mLineCount;
652}
653
654// Open a file and return its file desciptor.
655int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) {
656    if (fileName == "-") {
657        return STDIN_FILENO;
658    }
659    else {
660        struct stat sb;
661        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
662        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
663            if (!mSuppressFileMessages) {
664                if (errno == EACCES) {
665                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
666                }
667                else if (errno == ENOENT) {
668                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
669                }
670                else {
671                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
672                }
673            }
674            return fileDescriptor;
675        }
676        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
677            if (!mSuppressFileMessages) {
678                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
679            }
680            close(fileDescriptor);
681            return -1;
682        }
683        return fileDescriptor;
684    }
685}
686
687// The process of searching a group of files may use a sequential or a task
688// parallel approach.
689
690void * DoGrepThreadFunction(void *args) {
691    return reinterpret_cast<GrepEngine *>(args)->DoGrepThreadMethod();
692}
693
694bool GrepEngine::searchAllFiles() {
695    const unsigned numOfThreads = std::min(static_cast<unsigned>(Threads), static_cast<unsigned>(inputPaths.size()));
696    std::vector<pthread_t> threads(numOfThreads);
697
698    for(unsigned long i = 1; i < numOfThreads; ++i) {
699        const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
700        if (rc) {
701            llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
702        }
703    }
704    // Main thread also does the work;
705    DoGrepThreadMethod();
706    for(unsigned i = 1; i < numOfThreads; ++i) {
707        void * status = nullptr;
708        const int rc = pthread_join(threads[i], &status);
709        if (rc) {
710            llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
711        }
712    }
713    return grepMatchFound;
714}
715
716
717// DoGrep thread function.
718void * GrepEngine::DoGrepThreadMethod() {
719
720    unsigned fileIdx = mNextFileToGrep++;
721    while (fileIdx < inputPaths.size()) {
722        if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
723            errs() << "Tracing " << inputPaths[fileIdx].string() << "\n";
724        }
725        const auto grepResult = doGrep(inputPaths[fileIdx].string(), mResultStrs[fileIdx]);
726        mFileStatus[fileIdx] = FileStatus::GrepComplete;
727        if (grepResult > 0) {
728            grepMatchFound = true;
729        }
730        if ((mEngineKind == EngineKind::QuietMode) && grepMatchFound) {
731            if (pthread_self() != mEngineThread) {
732                pthread_exit(nullptr);
733            }
734            return nullptr;
735        }
736        fileIdx = mNextFileToGrep++;
737    }
738
739    unsigned printIdx = mNextFileToPrint++;
740    while (printIdx < inputPaths.size()) {
741        const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
742        if (readyToPrint) {
743            const auto output = mResultStrs[printIdx].str();
744            if (!output.empty()) {
745                llvm::outs() << output;
746            }
747            mFileStatus[printIdx] = FileStatus::PrintComplete;
748            printIdx = mNextFileToPrint++;
749        } else {
750            mGrepDriver->performIncrementalCacheCleanupStep();
751        }
752        sched_yield();
753    }
754
755    if (pthread_self() != mEngineThread) {
756        pthread_exit(nullptr);
757    } else {
758        // Always perform one final cache cleanup step.
759        mGrepDriver->performIncrementalCacheCleanupStep();
760        if (mGrepStdIn) {
761            std::ostringstream s;
762            const auto grepResult = doGrep("-", s);
763            llvm::outs() << s.str();
764            if (grepResult) grepMatchFound = true;
765        }
766    }
767    return nullptr;
768}
769
770   
771   
772InternalSearchEngine::InternalSearchEngine() :
773    mGrepRecordBreak(GrepRecordBreakKind::LF),
774    mCaseInsensitive(false),
775    mGrepDriver(make_unique<ParabixDriver>("InternalEngine")) {}
776   
777void InternalSearchEngine::grepCodeGen(re::RE * matchingRE, re::RE * excludedRE, MatchAccumulator * accum) {
778    auto & idb = mGrepDriver->getBuilder();
779    Module * M = idb->getModule();
780   
781    mSaveSegmentPipelineParallel = codegen::SegmentPipelineParallel;
782    codegen::SegmentPipelineParallel = false;
783    const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
784   
785    re::CC * breakCC = nullptr;
786    if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
787        breakCC = re::makeByte(0x0);
788    } else {// if (mGrepRecordBreak == GrepRecordBreakKind::LF)
789        breakCC = re::makeByte(0x0A);
790    }
791    bool excludeNothing = (excludedRE == nullptr) || (isa<re::Alt>(excludedRE) && cast<re::Alt>(excludedRE)->empty());
792    bool matchAllLines = (matchingRE == nullptr) || isa<re::End>(matchingRE);
793    if (!matchAllLines) {
794        matchingRE = resolveCaseInsensitiveMode(matchingRE, mCaseInsensitive);
795        matchingRE = regular_expression_passes(matchingRE);
796        matchingRE = re::exclude_CC(matchingRE, breakCC);
797        matchingRE = resolveAnchors(matchingRE, breakCC);
798        matchingRE = toUTF8(matchingRE);
799    }
800    if (!excludeNothing) {
801        excludedRE = resolveCaseInsensitiveMode(excludedRE, mCaseInsensitive);
802        excludedRE = regular_expression_passes(excludedRE);
803        excludedRE = re::exclude_CC(excludedRE, breakCC);
804        excludedRE = resolveAnchors(excludedRE, breakCC);
805        excludedRE = toUTF8(excludedRE);
806    }
807    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
808    mainFunc->setCallingConv(CallingConv::C);
809    auto args = mainFunc->arg_begin();
810    Value * const buffer = &*(args++);
811    buffer->setName("buffer");
812    Value * length = &*(args++);
813    length->setName("length");
814   
815    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
816    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
817    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy());
818    sourceK->setInitialArguments({buffer, length});
819    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
820    StreamSetBuffer * RecordBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
821    std::string RBname = (mGrepRecordBreak == GrepRecordBreakKind::Null) ? "Null" : "LF";
822
823   
824    StreamSetBuffer * BasisBits = nullptr;
825   
826    if (matchAllLines && excludeNothing) {
827        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC});
828        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {RecordBreakStream});
829    } else {
830        BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
831        kernel::Kernel * s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
832        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
833       
834        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC}, 8);
835        mGrepDriver->makeKernelCall(breakK, {BasisBits}, {RecordBreakStream});
836    }
837   
838    std::vector<std::string> externalStreamNames;
839    StreamSetBuffer * MatchingRecords = nullptr;
840    if (matchAllLines) {
841        MatchingRecords = RecordBreakStream;
842    } else {
843        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
844        kernel::Kernel * includeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, matchingRE, externalStreamNames);
845        mGrepDriver->makeKernelCall(includeK, {BasisBits}, {MatchResults});
846        MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
847        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
848        mGrepDriver->makeKernelCall(matchedLinesK, {MatchResults, RecordBreakStream}, {MatchingRecords});
849    }
850    if (!excludeNothing) {
851        StreamSetBuffer * ExcludedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
852        kernel::Kernel * excludeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, excludedRE, externalStreamNames);
853        mGrepDriver->makeKernelCall(excludeK, {BasisBits}, {ExcludedResults});
854        StreamSetBuffer * ExcludedRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
855        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
856        mGrepDriver->makeKernelCall(matchedLinesK, {ExcludedResults, RecordBreakStream}, {ExcludedRecords});
857
858        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
859        if (!matchAllLines) {
860            StreamSetBuffer * nonExcluded = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
861            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {nonExcluded});
862            StreamSetBuffer * included = MatchingRecords;
863            MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
864            kernel::Kernel * streamsIntersectK = mGrepDriver->addKernelInstance<kernel::StreamsIntersect>(idb, 1, 2);
865            mGrepDriver->makeKernelCall(streamsIntersectK, {included, nonExcluded}, {MatchingRecords});
866        }
867        else {
868            MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
869            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {MatchingRecords});
870        }
871    }
872    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
873    scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
874    mGrepDriver->makeKernelCall(scanMatchK, {MatchingRecords, RecordBreakStream, ByteStream}, {});
875    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
876    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
877    mGrepDriver->generatePipelineIR();
878    mGrepDriver->deallocateBuffers();
879    idb->CreateRetVoid();
880    mGrepDriver->finalizeObject();
881}
882
883void InternalSearchEngine::doGrep(const char * search_buffer, size_t bufferLength) {
884    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
885    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
886    f(search_buffer, bufferLength);
887    codegen::SegmentPipelineParallel = mSaveSegmentPipelineParallel;
888}
889
890GrepEngine::~GrepEngine() { }
891
892InternalSearchEngine::~InternalSearchEngine() { }
893
894}
Note: See TracBrowser for help on using the repository browser.