source: icGREP/icgrep-devel/icgrep/lz4_grep.cpp @ 6184

Last change on this file since 6184 was 6184, checked in by nmedfort, 12 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 4.4 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <llvm/IR/Module.h>
9#include <llvm/Linker/Linker.h>
10#include <llvm/Support/PrettyStackTrace.h>
11#include <llvm/Support/Signals.h>
12#include <llvm/Support/ManagedStatic.h>
13#include <IR_Gen/idisa_target.h>
14#include <boost/filesystem.hpp>
15#include <boost/iostreams/device/mapped_file.hpp>
16
17#include <lz4/lz4_frame_decoder.h>
18#include <cc/cc_compiler.h>
19#include <toolchain/toolchain.h>
20#include <kernels/cc_kernel.h>
21#include <kernels/streamset.h>
22#include <kernels/s2p_kernel.h>
23#include <kernels/kernel_builder.h>
24#include <toolchain/cpudriver.h>
25
26#include <iostream>
27#include <lz4/grep/lz4_grep_base_generator.h>
28#include <lz4/grep/lz4_grep_bitstream_generator.h>
29#include <lz4/grep/lz4_grep_bytestream_generator.h>
30#include <lz4/grep/lz4_grep_swizzle_generator.h>
31
32
33
34#include <re/re_alt.h>
35#include <re/re_start.h>
36#include <re/re_end.h>
37#include <re/re_utility.h>
38#include <re/re_toolchain.h>
39#include <pablo/pablo_toolchain.h>
40#include <llvm/Support/raw_ostream.h>
41#include <memory>
42
43namespace re { class CC; }
44
45using namespace llvm;
46using namespace kernel;
47
48static cl::OptionCategory lz4GrepFlags("Command Flags", "lz4d options");
49static cl::opt<std::string> regexString(cl::Positional, cl::desc("<regex>"), cl::Required, cl::cat(lz4GrepFlags));
50static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(lz4GrepFlags));
51static cl::opt<bool> countOnly("count-only", cl::desc("Only count the match result"), cl::init(false), cl::cat(lz4GrepFlags));
52static cl::opt<bool> enableMultiplexing("enable-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false), cl::cat(lz4GrepFlags));
53static cl::opt<bool> utf8CC("utf8-CC", cl::desc("Use UTF-8 Character Class."), cl::init(false), cl::cat(lz4GrepFlags));
54
55static cl::OptionCategory lz4GrepDebugFlags("LZ4 Grep Debug Flags", "lz4d debug options");
56static cl::opt<bool> swizzledDecompression("swizzled-decompression", cl::desc("Use swizzle approach for decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
57static cl::opt<bool> bitStreamDecompression("bitstream-decompression", cl::desc("Use bit stream approach for decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
58
59
60int main(int argc, char *argv[]) {
61    // This boilerplate provides convenient stack traces and clean LLVM exit
62    // handling. It also initializes the built in support for convenient
63    // command line option handling.
64    sys::PrintStackTraceOnErrorSignal(argv[0]);
65    llvm::PrettyStackTraceProgram X(argc, argv);
66    llvm_shutdown_obj shutdown;
67    codegen::ParseCommandLineOptions(argc, argv, {&lz4GrepFlags, &lz4GrepDebugFlags, codegen::codegen_flags()});
68    std::string fileName = inputFile;
69    LZ4FrameDecoder lz4Frame(fileName);
70    if (!lz4Frame.isValid()) {
71        errs() << "Invalid LZ4 file.\n";
72        return -1;
73    }
74
75    boost::iostreams::mapped_file_source mappedFile;
76    // Since mmap offset has to be multiples of pages, we can't use it to skip headers.
77    mappedFile.open(fileName , lz4Frame.getBlocksLength() + lz4Frame.getBlocksStart());
78    //char *fileBuffer = const_cast<char *>(mappedFile.data()) + lz4Frame.getBlocksStart();
79    char *fileBuffer = const_cast<char *>(mappedFile.data());
80    re::RE * re_ast = re::RE_Parser::parse(regexString, re::MULTILINE_MODE_FLAG);
81
82    const auto mode = (countOnly ? LZ4GrepBaseGenerator::CountOnly : LZ4GrepBaseGenerator::Match);
83
84    std::unique_ptr<LZ4GrepBaseGenerator> g;
85    if (swizzledDecompression) {
86        g.reset(new LZ4GrepSwizzleGenerator(mode));
87    } else if (bitStreamDecompression) {
88        g.reset(new LZ4GrepBitStreamGenerator(mode));
89    } else {
90        g.reset(new LZ4GrepByteStreamGenerator(mode));
91    }
92
93    if (countOnly) {
94        g->generateCountOnlyGrepPipeline(re_ast, enableMultiplexing, utf8CC);
95        auto main = g->getCountOnlyGrepMainFunction();
96        uint64_t countResult = main(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
97        llvm::outs() << countResult << "\n";
98    } else {
99        g->generateScanMatchGrepPipeline(re_ast, enableMultiplexing, utf8CC);
100        g->invokeScanMatchGrep(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
101    }
102
103    mappedFile.close();
104
105    return 0;
106}
Note: See TracBrowser for help on using the repository browser.