source: icGREP/icgrep-devel/icgrep/lz4d.cpp @ 6161

Last change on this file since 6161 was 6137, checked in by xwa163, 14 months ago
  1. LZ4 ScanMatch? pipeline
  2. Refactor LZ4 Generator
  3. Adjust some naming
File size: 6.6 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <llvm/IR/Module.h>
9#include <llvm/Linker/Linker.h>
10#include <llvm/Support/PrettyStackTrace.h>
11#include <llvm/Support/Signals.h>
12#include <llvm/Support/ManagedStatic.h>
13#include <toolchain/toolchain.h>
14
15#include <IR_Gen/idisa_target.h>
16#include <boost/filesystem.hpp>
17#include <boost/iostreams/device/mapped_file.hpp>
18
19#include <lz4/lz4_frame_decoder.h>
20#include <cc/alphabet.h>
21#include <cc/cc_compiler.h>
22#include <kernels/cc_kernel.h>
23#include <kernels/streamset.h>
24#include <kernels/s2p_kernel.h>
25#include <kernels/source_kernel.h>
26#include <kernels/stdout_kernel.h>
27#include <kernels/lz4/lz4_index_decoder.h>
28#include <kernels/lz4/lz4_bytestream_decoder.h>
29
30#include <kernels/kernel_builder.h>
31#include <toolchain/cpudriver.h>
32#include <iostream>
33#include <llvm/Support/raw_ostream.h>
34namespace re { class CC; }
35
36using namespace llvm;
37using namespace parabix;
38using namespace kernel;
39
40static cl::OptionCategory lz4dFlags("Command Flags", "lz4d options");
41static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(lz4dFlags));
42static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"), cl::Required, cl::cat(lz4dFlags));
43static cl::opt<bool> overwriteOutput("f", cl::desc("Overwrite existing output file."), cl::init(false), cl::cat(lz4dFlags));
44
45typedef void (*MainFunctionType)(char * byte_data, size_t filesize, bool hasBlockChecksum);
46
47void generatePipeline(ParabixDriver & pxDriver) {
48    auto & iBuilder = pxDriver.getBuilder();
49    Module * M = iBuilder->getModule();
50
51    Type * const sizeTy = iBuilder->getSizeTy();
52    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
53    Type * const voidTy = iBuilder->getVoidTy();
54    Type * const inputType = iBuilder->getInt8PtrTy();
55   
56    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, boolTy, nullptr));
57    main->setCallingConv(CallingConv::C);
58    Function::arg_iterator args = main->arg_begin();
59    Value * const inputStream = &*(args++);
60    inputStream->setName("input");
61    Value * const fileSize = &*(args++);
62    fileSize->setName("fileSize");
63    Value * const hasBlockChecksum = &*(args++);
64    hasBlockChecksum->setName("hasBlockChecksum");
65
66    const unsigned segmentSize = codegen::SegmentSize;
67    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
68    // Output buffer should be at least one whole LZ4 block (4MB) large in case of uncompressed blocks.
69    // And the size (in bytes) also needs to be a power of two.
70    const unsigned decompressBufBlocks = (4 * 1024 * 1024) / codegen::BlockSize;
71
72    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
73
74    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
75    StreamSetBuffer * const BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
76    StreamSetBuffer * const Extenders = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
77    StreamSetBuffer * const LiteralIndexes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
78    StreamSetBuffer * const MatchIndexes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
79    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), decompressBufBlocks);
80
81   
82    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
83    sourceK->setInitialArguments({inputStream, fileSize});
84    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
85
86    // Input stream is not aligned due to the offset.
87    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ false);
88    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
89   
90    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
91    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
92
93    Kernel * lz4iK = pxDriver.addKernelInstance<LZ4IndexDecoderKernel>(iBuilder);
94    lz4iK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty())});
95    pxDriver.makeKernelCall(lz4iK, {ByteStream, Extenders}, {LiteralIndexes, MatchIndexes});
96
97    Kernel * lz4bK = pxDriver.addKernelInstance<LZ4ByteStreamDecoderKernel>(iBuilder, decompressBufBlocks * codegen::BlockSize);
98    pxDriver.makeKernelCall(lz4bK, {LiteralIndexes, MatchIndexes, ByteStream}, {DecompressedByteStream});
99
100    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
101    outK->setInitialArguments({iBuilder->GetString(outputFile)});
102    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
103 
104    pxDriver.generatePipelineIR();
105
106    pxDriver.deallocateBuffers();
107
108    iBuilder->CreateRetVoid();
109 
110    pxDriver.finalizeObject();
111}
112
113int main(int argc, char *argv[]) {
114    // This boilerplate provides convenient stack traces and clean LLVM exit
115    // handling. It also initializes the built in support for convenient
116    // command line option handling.
117    sys::PrintStackTraceOnErrorSignal(argv[0]);
118    llvm::PrettyStackTraceProgram X(argc, argv);
119    llvm_shutdown_obj shutdown;
120    codegen::ParseCommandLineOptions(argc, argv, {&lz4dFlags, codegen::codegen_flags()});
121    std::string fileName = inputFile;
122    LZ4FrameDecoder lz4Frame(fileName);
123    if (!lz4Frame.isValid()) {
124        errs() << "Invalid LZ4 file.\n";
125        return -1;
126    }
127
128    if (boost::filesystem::exists(outputFile)) {
129        if (overwriteOutput) {
130            boost::filesystem::remove(outputFile);
131        } else {
132            errs() << outputFile + " existed. Use -f argument to overwrite.\n";
133            return -1;
134        }
135    }
136
137    boost::iostreams::mapped_file_source mappedFile;
138    // Since mmap offset has to be multiples of pages, we can't use it to skip headers.
139    mappedFile.open(fileName, lz4Frame.getBlocksLength() + lz4Frame.getBlocksStart());
140    char *fileBuffer = const_cast<char *>(mappedFile.data()) + lz4Frame.getBlocksStart();
141    ParabixDriver pxDriver("lz4d");
142    generatePipeline(pxDriver);
143    auto main = reinterpret_cast<MainFunctionType>(pxDriver.getMain());
144
145    main(fileBuffer, lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
146
147    mappedFile.close();
148    return 0;
149}
Note: See TracBrowser for help on using the repository browser.