source: icGREP/icgrep-devel/icgrep/lz4_grep.cpp @ 6173

Last change on this file since 6173 was 6150, checked in by xwa163, 11 months ago
  1. Remove LZParabix related codes
  2. Enable multiplexing for LZ4 ScanMatch? pipeline
  3. Some minor bug fixing
File size: 4.3 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <llvm/IR/Module.h>
9#include <llvm/Linker/Linker.h>
10#include <llvm/Support/PrettyStackTrace.h>
11#include <llvm/Support/Signals.h>
12#include <llvm/Support/ManagedStatic.h>
13#include <IR_Gen/idisa_target.h>
14#include <boost/filesystem.hpp>
15#include <boost/iostreams/device/mapped_file.hpp>
16
17#include <lz4/lz4_frame_decoder.h>
18#include <cc/cc_compiler.h>
19#include <toolchain/toolchain.h>
20#include <kernels/cc_kernel.h>
21#include <kernels/streamset.h>
22#include <kernels/s2p_kernel.h>
23#include <kernels/kernel_builder.h>
24#include <toolchain/cpudriver.h>
25
26#include <iostream>
27#include <lz4/grep/lz4_grep_base_generator.h>
28#include <lz4/grep/lz4_grep_bitstream_generator.h>
29#include <lz4/grep/lz4_grep_bytestream_generator.h>
30#include <lz4/grep/lz4_grep_swizzle_generator.h>
31
32
33
34#include <re/re_alt.h>
35#include <re/re_start.h>
36#include <re/re_end.h>
37#include <re/re_utility.h>
38#include <re/re_toolchain.h>
39#include <pablo/pablo_toolchain.h>
40#include <llvm/Support/raw_ostream.h>
41
42namespace re { class CC; }
43
44using namespace llvm;
45using namespace parabix;
46using namespace kernel;
47
48static cl::OptionCategory lz4GrepFlags("Command Flags", "lz4d options");
49static cl::opt<std::string> regexString(cl::Positional, cl::desc("<regex>"), cl::Required, cl::cat(lz4GrepFlags));
50static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(lz4GrepFlags));
51static cl::opt<bool> countOnly("count-only", cl::desc("Only count the match result"), cl::init(false), cl::cat(lz4GrepFlags));
52static cl::opt<bool> enableMultiplexing("enable-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false), cl::cat(lz4GrepFlags));
53static cl::opt<bool> utf8CC("utf8-CC", cl::desc("Use UTF-8 Character Class."), cl::init(false), cl::cat(lz4GrepFlags));
54
55static cl::OptionCategory lz4GrepDebugFlags("LZ4 Grep Debug Flags", "lz4d debug options");
56static cl::opt<bool> swizzledDecompression("swizzled-decompression", cl::desc("Use swizzle approach for decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
57static cl::opt<bool> bitStreamDecompression("bitstream-decompression", cl::desc("Use bit stream approach for decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
58
59
60int main(int argc, char *argv[]) {
61    // This boilerplate provides convenient stack traces and clean LLVM exit
62    // handling. It also initializes the built in support for convenient
63    // command line option handling.
64    sys::PrintStackTraceOnErrorSignal(argv[0]);
65    llvm::PrettyStackTraceProgram X(argc, argv);
66    llvm_shutdown_obj shutdown;
67    codegen::ParseCommandLineOptions(argc, argv, {&lz4GrepFlags, &lz4GrepDebugFlags, codegen::codegen_flags()});
68    std::string fileName = inputFile;
69    LZ4FrameDecoder lz4Frame(fileName);
70    if (!lz4Frame.isValid()) {
71        errs() << "Invalid LZ4 file.\n";
72        return -1;
73    }
74
75    boost::iostreams::mapped_file_source mappedFile;
76    // Since mmap offset has to be multiples of pages, we can't use it to skip headers.
77    mappedFile.open(fileName , lz4Frame.getBlocksLength() + lz4Frame.getBlocksStart());
78    //char *fileBuffer = const_cast<char *>(mappedFile.data()) + lz4Frame.getBlocksStart();
79    char *fileBuffer = const_cast<char *>(mappedFile.data());
80    re::RE * re_ast = re::RE_Parser::parse(regexString, re::MULTILINE_MODE_FLAG);
81
82    LZ4GrepBaseGenerator* g = nullptr;
83    if (swizzledDecompression) {
84        g = new LZ4GrepSwizzleGenerator();
85    } else if (bitStreamDecompression) {
86        g = new LZ4GrepBitStreamGenerator();
87    } else {
88        g = new LZ4GrepByteStreamGenerator();
89    }
90
91    if (countOnly) {
92        g->generateCountOnlyGrepPipeline(re_ast, enableMultiplexing, utf8CC);
93        auto main = g->getCountOnlyGrepMainFunction();
94        uint64_t countResult = main(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
95        llvm::outs() << countResult << "\n";
96    } else {
97        g->generateScanMatchGrepPipeline(re_ast, enableMultiplexing, utf8CC);
98        g->invokeScanMatchGrep(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
99    }
100
101    mappedFile.close();
102    delete g;
103
104    return 0;
105}
Note: See TracBrowser for help on using the repository browser.