source: icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp @ 6047

Last change on this file since 6047 was 6047, checked in by nmedfort, 12 months ago

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

File size: 40.1 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#include <set>
7#include "grep_engine.h"
8#include <llvm/IR/Module.h>
9#include <boost/filesystem.hpp>
10#include <UCD/resolve_properties.h>
11#include <kernels/charclasses.h>
12#include <kernels/cc_kernel.h>
13#include <kernels/grep_kernel.h>
14#include <kernels/UCD_property_kernel.h>
15#include <kernels/grapheme_kernel.h>
16#include <kernels/linebreak_kernel.h>
17#include <kernels/streams_merge.h>
18#include <kernels/source_kernel.h>
19#include <kernels/s2p_kernel.h>
20#include <kernels/scanmatchgen.h>
21#include <kernels/streamset.h>
22#include <kernels/until_n.h>
23#include <kernels/kernel_builder.h>
24#include <pablo/pablo_kernel.h>
25#include <cc/alphabet.h>
26#include <re/re_cc.h>
27#include <re/re_alt.h>
28#include <re/re_end.h>
29#include <re/re_name.h>
30#include <re/casing.h>
31#include <re/exclude_CC.h>
32#include <re/to_utf8.h>
33#include <re/re_toolchain.h>
34#include <toolchain/toolchain.h>
35#include <re/re_analysis.h>
36#include <re/re_name_resolve.h>
37#include <re/re_name_gather.h>
38#include <re/collect_ccs.h>
39#include <re/replaceCC.h>
40#include <re/re_multiplex.h>
41#include <re/grapheme_clusters.h>
42#include <re/re_utility.h>
43#include <re/printer_re.h>
44#include <toolchain/toolchain.h>
45#include <toolchain/cpudriver.h>
46#include <iostream>
47#include <cc/multiplex_CCs.h>
48#include <llvm/Support/raw_ostream.h>
49#include <util/file_select.h>
50#include <util/aligned_allocator.h>
51#include <sys/stat.h>
52#include <fcntl.h>
53#include <errno.h>
54#include <llvm/ADT/STLExtras.h> // for make_unique
55#include <llvm/Support/CommandLine.h>
56#include <llvm/Support/Debug.h>
57#include <llvm/Support/Casting.h>
58#include <sched.h>
59
60using namespace parabix;
61using namespace llvm;
62using namespace cc;
63using namespace kernel;
64
65static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(2));
66static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
67static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
68static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
69static cl::opt<bool> MultithreadedSimpleRE("enable-simple-RE-kernels", cl::desc("Enable individual CC kernels for simple REs."), cl::init(false));
70const unsigned DefaultByteCClimit = 6;
71
72static cl::opt<unsigned> ByteCClimit("byte-CC-limit", cl::desc("Max number of CCs for byte CC pipeline."), cl::init(DefaultByteCClimit));
73
74
75namespace grep {
76   
77extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal) {
78    reinterpret_cast<GrepCallBackObject *>(callback_object_addr)->handle_signal(signal);
79}
80   
81void GrepCallBackObject::handle_signal(unsigned s) {
82    if (static_cast<GrepSignal>(s) == GrepSignal::BinaryFile) {
83        mBinaryFile = true;
84    } else {
85        llvm::report_fatal_error("Unknown GrepSignal");
86    }
87}
88
89extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end) {
90    reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
91}
92
93extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end) {
94    reinterpret_cast<MatchAccumulator *>(accum_addr)->finalize_match(buffer_end);
95}
96   
97
98inline static size_t ceil_log2(const size_t v) {
99    assert ("log2(0) is undefined!" && v != 0);
100    assert ("sizeof(size_t) == sizeof(long)" && sizeof(size_t) == sizeof(long));
101    return (sizeof(size_t) * CHAR_BIT) - __builtin_clzl(v - 1UL);
102}
103
104void SearchableBuffer::addSearchCandidate(const char * C_string_ptr) {
105    size_t length = strlen(C_string_ptr)+1;
106    if (mSpace_used + length >= mAllocated_capacity) {
107        size_t new_capacity = size_t{1} << (ceil_log2(mSpace_used + length + 1));
108        AlignedAllocator<char, BUFFER_ALIGNMENT> alloc;
109        char * new_buffer = mAllocator.allocate(new_capacity, 0);
110        memcpy(new_buffer, mBuffer_base, mSpace_used);
111        memset(&new_buffer[mSpace_used], 0, new_capacity-mSpace_used);
112        if (mBuffer_base != mInitial_buffer) {
113            alloc.deallocate(mBuffer_base, 0);
114        }
115        mBuffer_base = new_buffer;
116        mAllocated_capacity = new_capacity;
117    }
118    memcpy((void * ) &mBuffer_base[mSpace_used], C_string_ptr, length);
119    mSpace_used += length;
120    assert("Search candidate not null terminated" && (mBuffer_base[mSpace_used] == '\0'));
121    mEntries++;
122}
123
124SearchableBuffer::SearchableBuffer() :
125    mAllocated_capacity(INITIAL_CAPACITY),
126    mSpace_used(0),
127    mEntries(0),
128    mBuffer_base(mInitial_buffer) {
129    memset(mBuffer_base, 0, INITIAL_CAPACITY);
130}
131
132SearchableBuffer::~SearchableBuffer() {
133    if (mBuffer_base != mInitial_buffer) {
134        mAllocator.deallocate(mBuffer_base, 0);
135    }
136}
137
138
139
140// Grep Engine construction and initialization.
141
142GrepEngine::GrepEngine() :
143    mSuppressFileMessages(false),
144    mBinaryFilesMode(argv::Text),
145    mPreferMMap(true),
146    mShowFileNames(false),
147    mStdinLabel("(stdin)"),
148    mShowLineNumbers(false),
149    mInitialTab(false),
150    mCaseInsensitive(false),
151    mInvertMatches(false),
152    mMaxCount(0),
153    mGrepStdIn(false),
154    mGrepDriver(make_unique<ParabixDriver>("engine")),
155    mNextFileToGrep(0),
156    mNextFileToPrint(0),
157    grepMatchFound(false),
158    mGrepRecordBreak(GrepRecordBreakKind::LF),
159    mMoveMatchesToEOL(true),
160    mEngineThread(pthread_self()) {}
161
162QuietModeEngine::QuietModeEngine() : GrepEngine() {
163    mEngineKind = EngineKind::QuietMode;
164    mMoveMatchesToEOL = false;
165    mMaxCount = 1;
166}
167
168MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithMatch, bool useNullSeparators) :
169    GrepEngine(), mRequiredCount(showFilesWithMatch) {
170    mEngineKind = EngineKind::MatchOnly;
171    mFileSuffix = useNullSeparators ? std::string("\0", 1) : "\n";
172    mMoveMatchesToEOL = false;
173    mMaxCount = 1;
174    mShowFileNames = true;
175}
176
177CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
178    mEngineKind = EngineKind::CountOnly;
179    mFileSuffix = ":";
180}
181
182EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
183    mEngineKind = EngineKind::EmitMatches;
184    mFileSuffix = mInitialTab ? "\t:" : ":";
185}
186
187   
188void GrepEngine::setRecordBreak(GrepRecordBreakKind b) {
189    mGrepRecordBreak = b;
190}
191
192   
193
194   
195void GrepEngine::initFileResult(std::vector<boost::filesystem::path> & paths) {
196    const unsigned n = paths.size();
197    mResultStrs.resize(n);
198    mFileStatus.resize(n, FileStatus::Pending);
199    inputPaths = paths;
200}
201
202void GrepEngine::initREs(std::vector<re::RE *> & REs) {
203    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
204        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
205    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
206        mBreakCC = re::makeByte(0);  // Null
207    } else {
208        mBreakCC = re::makeByte(0x0A); // LF
209    }
210    re::RE * anchorRE = mBreakCC;
211    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
212        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
213        anchorName->setDefinition(re::makeUnicodeBreak());
214        anchorRE = anchorName;
215    }
216   
217    mREs = REs;
218    bool allAnchored = true;
219    for(unsigned i = 0; i < mREs.size(); ++i) {
220        if (!hasEndAnchor(mREs[i])) allAnchored = false;
221        mREs[i] = resolveModesAndExternalSymbols(mREs[i], mCaseInsensitive);
222        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
223        mREs[i] = resolveAnchors(mREs[i], anchorRE);
224        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
225        mREs[i] = regular_expression_passes(mREs[i]);
226    }
227    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
228
229}
230
231
232   
233// Code Generation
234//
235// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
236
237unsigned LLVM_READNONE calculateMaxCountRate(const std::unique_ptr<kernel::KernelBuilder> & b) {
238    const unsigned packSize = b->getSizeTy()->getBitWidth();
239    return (packSize * packSize) / b->getBitBlockWidth();
240}
241   
242std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(StreamSetBuffer * SourceStream, Value * callback_object_addr) {
243    auto & idb = mGrepDriver->getBuilder();
244    const unsigned segmentSize = codegen::SegmentSize;
245    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
246    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
247    const unsigned baseBufferSize = segmentSize * (mMaxCount > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
248    const unsigned encodingBits = 8;
249   
250   
251    //  Regular Expression Processing and Analysis Phase
252    const auto nREs = mREs.size();
253    bool hasGCB[nREs];
254    bool anyGCB = false;
255
256    for(unsigned i = 0; i < nREs; ++i) {
257        hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
258        anyGCB |= hasGCB[i];
259    }
260    StreamSetBuffer * ByteStream = nullptr;
261    if (mBinaryFilesMode == argv::Text) {
262        ByteStream = SourceStream;
263    } else if (mBinaryFilesMode == argv::WithoutMatch) {
264        ByteStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), baseBufferSize);
265        kernel::Kernel * binaryCheckK = mGrepDriver->addKernelInstance<kernel::AbortOnNull>(idb);
266        binaryCheckK->setInitialArguments({callback_object_addr});
267        mGrepDriver->makeKernelCall(binaryCheckK, {SourceStream}, {ByteStream});
268        mGrepDriver->LinkFunction(*binaryCheckK, "signal_dispatcher", &signal_dispatcher);
269    } else {
270        llvm::report_fatal_error("Binary mode not supported.");
271    }
272    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
273    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
274   
275    re::RE * prefixRE;
276    re::RE * suffixRE;
277    // For simple regular expressions with a small number of characters, we
278    // can bypass transposition and use the Direct CC compiler.
279    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
280    if (isSimple) {
281        mREs[0] = toUTF8(mREs[0]);
282    }
283    if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
284        std::vector<std::string> externalStreamNames;
285        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
286        if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
287            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
288            for (auto cc : CCs) {
289                auto ccName = makeName(cc);
290                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
291                std::string ccNameStr = ccName->getFullName();
292                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
293                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
294                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
295                externalStreamNames.push_back(ccNameStr);
296                icgrepInputSets.push_back(ccStream);
297            }
298        }
299        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
300        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
301        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
302        MatchResultsBufs[0] = MatchResults;
303        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
304        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
305    } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
306        std::vector<std::string> externalStreamNames;
307        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
308        if (MultithreadedSimpleRE) {
309            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
310            for (auto cc : CCs) {
311                auto ccName = makeName(cc);
312                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
313                std::string ccNameStr = ccName->getFullName();
314                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
315                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
316                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
317                externalStreamNames.push_back(ccNameStr);
318                icgrepInputSets.push_back(ccStream);
319            }
320        }
321        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
322        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
323        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
324        MatchResultsBufs[0] = MatchResults;
325        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
326        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
327    } else {
328       
329        StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
330        kernel::Kernel * s2pk = nullptr;
331        if (PabloTransposition) {
332            s2pk = mGrepDriver->addKernelInstance<kernel::S2P_PabloKernel>(idb);
333        }
334        else {
335            s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
336        }
337        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
338
339        StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
340        StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
341
342        StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
343        kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
344        mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
345       
346        kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
347        mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
348
349        if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
350            LineBreakStream = LineFeedStream;
351        } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
352            kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
353            mGrepDriver->makeKernelCall(breakK, {BasisBits}, {LineBreakStream});
354        } else {
355            LineBreakStream = UnicodeLB;
356        }
357       
358        std::map<std::string, StreamSetBuffer *> propertyStream;
359        if (PropertyKernels) {
360            for (auto p : mUnicodeProperties) {
361                auto name = p->getFullName();
362                StreamSetBuffer * s = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
363                propertyStream.emplace(std::make_pair(name, s));
364                kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
365                mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
366            }
367        }
368        StreamSetBuffer * GCB_stream = nullptr;
369        if (anyGCB) {
370            GCB_stream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
371            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
372            mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
373        }
374
375        for(unsigned i = 0; i < nREs; ++i) {
376            std::vector<std::string> externalStreamNames;
377            std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits};
378            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
379                externalStreamNames.push_back("UTF8_LB");
380                icgrepInputSets.push_back(LineBreakStream);
381                externalStreamNames.push_back("UTF8_nonfinal");
382                icgrepInputSets.push_back(RequiredStreams);
383            }
384            std::set<re::Name *> UnicodeProperties;
385            if (PropertyKernels) {
386                re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
387                for (auto p : UnicodeProperties) {
388                    auto name = p->getFullName();
389                    auto f = propertyStream.find(name);
390                    if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
391                    externalStreamNames.push_back(name);
392                    icgrepInputSets.push_back(f->second);
393                }
394            }
395            if (hasGCB[i]) {
396                externalStreamNames.push_back("\\b{g}");
397                icgrepInputSets.push_back(GCB_stream);
398            }
399            if (CC_Multiplexing) {
400                const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
401                StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
402                if (UnicodeSets.size() <= 1) {
403                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
404                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
405                    MatchResultsBufs[i] = MatchResults;
406                } else {
407                    mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
408                    mREs[i] = transformCCs(mpx.get(), mREs[i]);
409                    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
410                    auto numOfCharacterClasses = mpx_basis.size();
411                    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
412                    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
413                    mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
414    //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
415    //                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
416                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
417                    icgrepInputSets.push_back(CharClasses);
418                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
419                    MatchResultsBufs[i] = MatchResults;
420                }
421            } else {
422                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
423                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
424                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
425                MatchResultsBufs[i] = MatchResults;
426            }
427        }
428    }
429
430    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
431    if (mREs.size() > 1) {
432        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
433        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
434        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
435    }
436    StreamSetBuffer * Matches = MergedResults;
437    if (mMoveMatchesToEOL) {
438        StreamSetBuffer * OriginalMatches = Matches;
439        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
440        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
441        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
442    }
443    if (mInvertMatches) {
444        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
445        StreamSetBuffer * OriginalMatches = Matches;
446        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
447        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
448    }
449    if (mMaxCount > 0) {
450        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
451        untilK->setInitialArguments({idb->getSize(mMaxCount)});
452        StreamSetBuffer * const AllMatches = Matches;
453        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
454        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
455    }
456
457    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
458}
459
460// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
461// which returns a count of the matches found (possibly subject to a MaxCount).
462//
463
464void GrepEngine::grepCodeGen() {
465    auto & idb = mGrepDriver->getBuilder();
466    Module * M = idb->getModule();
467
468    const unsigned encodingBits = 8;
469
470    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
471    mainFunc->setCallingConv(CallingConv::C);
472    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
473    auto args = mainFunc->arg_begin();
474
475    Value * const useMMap = &*(args++);
476    useMMap->setName("useMMap");
477    Value * const fileDescriptor = &*(args++);
478    fileDescriptor->setName("fileDescriptor");
479    Value * call_back_object = &*(args++);
480    call_back_object->setName("call_back_object");
481
482    StreamSetBuffer * const ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
483    kernel::Kernel * const sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
484    sourceK->setInitialArguments({useMMap, fileDescriptor});
485    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
486
487    StreamSetBuffer * LineBreakStream;
488    StreamSetBuffer * Matches;
489    std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream, call_back_object);
490
491    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance<kernel::PopcountKernel>(idb);
492    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
493    mGrepDriver->generatePipelineIR();
494    idb->setKernel(matchCountK);
495    Value * matchedLineCount = idb->getAccumulator("countResult");
496    matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
497    mGrepDriver->deallocateBuffers();
498    idb->CreateRet(matchedLineCount);
499   
500    mGrepDriver->finalizeObject();
501}
502
503//
504//  Default Report Match:  lines are emitted with whatever line terminators are found in the
505//  input.  However, if the final line is not terminated, a new line is appended.
506//
507void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
508    mResultStr << mLinePrefix;
509    if (mShowLineNumbers) {
510        // Internally line numbers are counted from 0.  For display, adjust
511        // the line number so that lines are numbered from 1.
512        if (mInitialTab) {
513            mResultStr << lineNum+1 << "\t:";
514        }
515        else {
516            mResultStr << lineNum+1 << ":";
517        }
518    }
519    size_t bytes = line_end - line_start + 1;
520    mResultStr.write(line_start, bytes);
521    mLineCount++;
522    unsigned last_byte = *line_end;
523    mTerminated = (last_byte >= 0x0A) && (last_byte <= 0x0D);
524    if (LLVM_UNLIKELY(!mTerminated)) {
525        if (last_byte == 0x85) {  //  Possible NEL terminator.
526            mTerminated = (bytes >= 2) && (static_cast<unsigned>(line_end[-1]) == 0xC2);
527        }
528        else {
529            // Possible LS or PS terminators.
530            mTerminated = (bytes >= 3) && (static_cast<unsigned>(line_end[-2]) == 0xE2)
531                                       && (static_cast<unsigned>(line_end[-1]) == 0x80)
532                                       && ((last_byte == 0xA8) || (last_byte == 0xA9));
533        }
534    }
535}
536
537void EmitMatch::finalize_match(char * buffer_end) {
538    if (!mTerminated) mResultStr << "\n";
539}
540
541void EmitMatchesEngine::grepCodeGen() {
542    auto & idb = mGrepDriver->getBuilder();
543    Module * M = idb->getModule();
544
545    const unsigned encodingBits = 8;
546
547    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt8Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
548    mainFunc->setCallingConv(CallingConv::C);
549    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
550    auto args = mainFunc->arg_begin();
551
552    Value * const useMMap = &*(args++);
553    useMMap->setName("useMMap");
554    Value * const fileDescriptor = &*(args++);
555    fileDescriptor->setName("fileDescriptor");
556    Value * match_accumulator = &*(args++);
557    match_accumulator->setName("match_accumulator");
558
559    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
560    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
561    sourceK->setInitialArguments({useMMap, fileDescriptor});
562    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
563
564    StreamSetBuffer * LineBreakStream;
565    StreamSetBuffer * Matches;
566    std::tie(LineBreakStream, Matches) = grepPipeline(ByteStream, match_accumulator);
567
568    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
569    scanMatchK->setInitialArguments({match_accumulator});
570    mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
571    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
572    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
573
574    mGrepDriver->generatePipelineIR();
575    mGrepDriver->deallocateBuffers();
576    idb->CreateRet(idb->getInt64(0));
577    mGrepDriver->finalizeObject();
578}
579
580
581//
582//  The doGrep methods apply a GrepEngine to a single file, processing the results
583//  differently based on the engine type.
584
585bool canMMap(const std::string & fileName) {
586    if (fileName == "-") return false;
587    namespace fs = boost::filesystem;
588    fs::path p(fileName);
589    boost::system::error_code errc;
590    fs::file_status s = fs::status(p, errc);
591    return !errc && is_regular_file(s);
592}
593
594
595uint64_t GrepEngine::doGrep(const std::string & fileName, std::ostringstream & strm) {
596    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t callback_addr);
597    bool useMMap = mPreferMMap && canMMap(fileName);
598
599    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
600
601    int32_t fileDescriptor = openFile(fileName, strm);
602    if (fileDescriptor == -1) return 0;
603    GrepCallBackObject handler;
604    uint64_t grepResult = f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&handler));
605    close(fileDescriptor);
606    if (handler.binaryFileSignalled()) {
607        return 0;
608    }
609    else {
610        showResult(grepResult, fileName, strm);
611        return grepResult;
612    }
613}
614
615std::string GrepEngine::linePrefix(std::string fileName) {
616    if (!mShowFileNames) return "";
617    if (fileName == "-") {
618        return mStdinLabel + mFileSuffix;
619    }
620    else {
621        return fileName + mFileSuffix;
622    }
623}
624
625// Default: do not show anything
626void GrepEngine::showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) {
627}
628   
629void CountOnlyEngine::showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) {
630    if (mShowFileNames) strm << linePrefix(fileName);
631    strm << grepResult << "\n";
632}
633   
634void MatchOnlyEngine::showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) {
635    if (grepResult == mRequiredCount) {
636       strm << linePrefix(fileName);
637    }
638}
639
640uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, std::ostringstream & strm) {
641    typedef uint64_t (*GrepFunctionType)(bool useMMap, int32_t fileDescriptor, intptr_t accum_addr);
642    bool useMMap = mPreferMMap && canMMap(fileName);
643    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
644    int32_t fileDescriptor = openFile(fileName, strm);
645    if (fileDescriptor == -1) return 0;
646    EmitMatch accum(linePrefix(fileName), mShowLineNumbers, mInitialTab, strm);
647    f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&accum));
648    close(fileDescriptor);
649    if (accum.binaryFileSignalled()) {
650        accum.mResultStr.clear();
651    }
652    if (accum.mLineCount > 0) grepMatchFound = true;
653    return accum.mLineCount;
654}
655
656// Open a file and return its file desciptor.
657int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) {
658    if (fileName == "-") {
659        return STDIN_FILENO;
660    }
661    else {
662        struct stat sb;
663        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
664        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
665            if (!mSuppressFileMessages) {
666                if (errno == EACCES) {
667                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
668                }
669                else if (errno == ENOENT) {
670                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
671                }
672                else {
673                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
674                }
675            }
676            return fileDescriptor;
677        }
678        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
679            if (!mSuppressFileMessages) {
680                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
681            }
682            close(fileDescriptor);
683            return -1;
684        }
685        return fileDescriptor;
686    }
687}
688
689// The process of searching a group of files may use a sequential or a task
690// parallel approach.
691
692void * DoGrepThreadFunction(void *args) {
693    return reinterpret_cast<GrepEngine *>(args)->DoGrepThreadMethod();
694}
695
696bool GrepEngine::searchAllFiles() {
697    const unsigned numOfThreads = std::min(static_cast<unsigned>(Threads), static_cast<unsigned>(inputPaths.size()));
698    std::vector<pthread_t> threads(numOfThreads);
699
700    for(unsigned long i = 1; i < numOfThreads; ++i) {
701        const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
702        if (rc) {
703            llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
704        }
705    }
706    // Main thread also does the work;
707    DoGrepThreadMethod();
708    for(unsigned i = 1; i < numOfThreads; ++i) {
709        void * status = nullptr;
710        const int rc = pthread_join(threads[i], &status);
711        if (rc) {
712            llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
713        }
714    }
715    return grepMatchFound;
716}
717
718
719// DoGrep thread function.
720void * GrepEngine::DoGrepThreadMethod() {
721
722    unsigned fileIdx = mNextFileToGrep++;
723    while (fileIdx < inputPaths.size()) {
724        if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
725            errs() << "Tracing " << inputPaths[fileIdx].string() << "\n";
726        }
727        const auto grepResult = doGrep(inputPaths[fileIdx].string(), mResultStrs[fileIdx]);
728        mFileStatus[fileIdx] = FileStatus::GrepComplete;
729        if (grepResult > 0) {
730            grepMatchFound = true;
731        }
732        if ((mEngineKind == EngineKind::QuietMode) && grepMatchFound) {
733            if (pthread_self() != mEngineThread) {
734                pthread_exit(nullptr);
735            }
736            return nullptr;
737        }
738        fileIdx = mNextFileToGrep++;
739    }
740
741    unsigned printIdx = mNextFileToPrint++;
742    while (printIdx < inputPaths.size()) {
743        const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
744        if (readyToPrint) {
745            const auto output = mResultStrs[printIdx].str();
746            if (!output.empty()) {
747                llvm::outs() << output;
748            }
749            mFileStatus[printIdx] = FileStatus::PrintComplete;
750            printIdx = mNextFileToPrint++;
751        } else {
752            mGrepDriver->performIncrementalCacheCleanupStep();
753        }
754        sched_yield();
755    }
756
757    if (pthread_self() != mEngineThread) {
758        pthread_exit(nullptr);
759    } else {
760        // Always perform one final cache cleanup step.
761        mGrepDriver->performIncrementalCacheCleanupStep();
762        if (mGrepStdIn) {
763            std::ostringstream s;
764            const auto grepResult = doGrep("-", s);
765            llvm::outs() << s.str();
766            if (grepResult) grepMatchFound = true;
767        }
768    }
769    return nullptr;
770}
771
772   
773   
774InternalSearchEngine::InternalSearchEngine() :
775    mGrepRecordBreak(GrepRecordBreakKind::LF),
776    mCaseInsensitive(false),
777    mGrepDriver(make_unique<ParabixDriver>("InternalEngine")) {}
778   
779void InternalSearchEngine::grepCodeGen(re::RE * matchingRE, re::RE * excludedRE, MatchAccumulator * accum) {
780    auto & idb = mGrepDriver->getBuilder();
781    Module * M = idb->getModule();
782   
783    mSaveSegmentPipelineParallel = codegen::SegmentPipelineParallel;
784    codegen::SegmentPipelineParallel = false;
785    const unsigned segmentSize = codegen::BufferSegments * codegen::SegmentSize * codegen::ThreadNum;
786   
787    re::CC * breakCC = nullptr;
788    if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
789        breakCC = re::makeByte(0x0);
790    } else {// if (mGrepRecordBreak == GrepRecordBreakKind::LF)
791        breakCC = re::makeByte(0x0A);
792    }
793    bool excludeNothing = (excludedRE == nullptr) || (isa<re::Alt>(excludedRE) && cast<re::Alt>(excludedRE)->empty());
794    bool matchAllLines = (matchingRE == nullptr) || isa<re::End>(matchingRE);
795    if (!matchAllLines) {
796        matchingRE = resolveCaseInsensitiveMode(matchingRE, mCaseInsensitive);
797        matchingRE = regular_expression_passes(matchingRE);
798        matchingRE = re::exclude_CC(matchingRE, breakCC);
799        matchingRE = resolveAnchors(matchingRE, breakCC);
800        matchingRE = toUTF8(matchingRE);
801    }
802    if (!excludeNothing) {
803        excludedRE = resolveCaseInsensitiveMode(excludedRE, mCaseInsensitive);
804        excludedRE = regular_expression_passes(excludedRE);
805        excludedRE = re::exclude_CC(excludedRE, breakCC);
806        excludedRE = resolveAnchors(excludedRE, breakCC);
807        excludedRE = toUTF8(excludedRE);
808    }
809    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
810    mainFunc->setCallingConv(CallingConv::C);
811    auto args = mainFunc->arg_begin();
812    Value * const buffer = &*(args++);
813    buffer->setName("buffer");
814    Value * length = &*(args++);
815    length->setName("length");
816   
817    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
818    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
819    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::MemorySourceKernel>(idb);
820    sourceK->setInitialArguments({buffer, length});
821    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
822    StreamSetBuffer * RecordBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
823    std::string RBname = (mGrepRecordBreak == GrepRecordBreakKind::Null) ? "Null" : "LF";
824
825   
826    StreamSetBuffer * BasisBits = nullptr;
827   
828    if (matchAllLines && excludeNothing) {
829        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC});
830        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {RecordBreakStream});
831    } else {
832        BasisBits = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
833        kernel::Kernel * s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
834        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
835       
836        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, RBname, std::vector<re::CC *>{breakCC}, 8);
837        mGrepDriver->makeKernelCall(breakK, {BasisBits}, {RecordBreakStream});
838    }
839   
840    std::vector<std::string> externalStreamNames;
841    StreamSetBuffer * MatchingRecords = nullptr;
842    if (matchAllLines) {
843        MatchingRecords = RecordBreakStream;
844    } else {
845        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
846        kernel::Kernel * includeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, matchingRE, externalStreamNames);
847        mGrepDriver->makeKernelCall(includeK, {BasisBits}, {MatchResults});
848        MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
849        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
850        mGrepDriver->makeKernelCall(matchedLinesK, {MatchResults, RecordBreakStream}, {MatchingRecords});
851    }
852    if (!excludeNothing) {
853        StreamSetBuffer * ExcludedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
854        kernel::Kernel * excludeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, excludedRE, externalStreamNames);
855        mGrepDriver->makeKernelCall(excludeK, {BasisBits}, {ExcludedResults});
856        StreamSetBuffer * ExcludedRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
857        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
858        mGrepDriver->makeKernelCall(matchedLinesK, {ExcludedResults, RecordBreakStream}, {ExcludedRecords});
859
860        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
861        if (!matchAllLines) {
862            StreamSetBuffer * nonExcluded = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
863            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {nonExcluded});
864            StreamSetBuffer * included = MatchingRecords;
865            MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
866            kernel::Kernel * streamsIntersectK = mGrepDriver->addKernelInstance<kernel::StreamsIntersect>(idb, 1, 2);
867            mGrepDriver->makeKernelCall(streamsIntersectK, {included, nonExcluded}, {MatchingRecords});
868        }
869        else {
870            MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
871            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {MatchingRecords});
872        }
873    }
874    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance<kernel::ScanMatchKernel>(idb);
875    scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
876    mGrepDriver->makeKernelCall(scanMatchK, {MatchingRecords, RecordBreakStream, ByteStream}, {});
877    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
878    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
879    mGrepDriver->generatePipelineIR();
880    mGrepDriver->deallocateBuffers();
881    idb->CreateRetVoid();
882    mGrepDriver->finalizeObject();
883}
884
885void InternalSearchEngine::doGrep(const char * search_buffer, size_t bufferLength) {
886    typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
887    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
888    f(search_buffer, bufferLength);
889    codegen::SegmentPipelineParallel = mSaveSegmentPipelineParallel;
890}
891
892GrepEngine::~GrepEngine() { }
893
894InternalSearchEngine::~InternalSearchEngine() { }
895
896}
Note: See TracBrowser for help on using the repository browser.