source: icGREP/icgrep-devel/icgrep/lz4d.cpp @ 6026

Last change on this file since 6026 was 6026, checked in by xwa163, 12 months ago
  1. Implement SwizzledMultiplePDEPkernel with the same logic as new PDEPkernel, remove LZ4MultiplePDEPkernel, improve the performance
  2. Remove some unnecessary include
  3. Add prefix for some kernels
  4. Remove a legacy kernel
File size: 6.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <llvm/IR/Module.h>
9#include <llvm/Linker/Linker.h>
10#include <llvm/Support/PrettyStackTrace.h>
11#include <llvm/Support/Signals.h>
12#include <llvm/Support/ManagedStatic.h>
13#include <toolchain/toolchain.h>
14
15#include <IR_Gen/idisa_target.h>
16#include <boost/filesystem.hpp>
17#include <boost/iostreams/device/mapped_file.hpp>
18
19#include <lz4FrameDecoder.h>
20#include <cc/cc_compiler.h>
21#include <kernels/cc_kernel.h>
22#include <kernels/streamset.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/source_kernel.h>
25#include <kernels/stdout_kernel.h>
26#include <kernels/lz4/lz4_index_decoder.h>
27#include <kernels/lz4/lz4_bytestream_decoder.h>
28
29#include <kernels/kernel_builder.h>
30#include <toolchain/cpudriver.h>
31#include <iostream>
32namespace re { class CC; }
33
34using namespace llvm;
35using namespace parabix;
36using namespace kernel;
37
38static cl::OptionCategory lz4dFlags("Command Flags", "lz4d options");
39static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(lz4dFlags));
40static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"), cl::Required, cl::cat(lz4dFlags));
41static cl::opt<bool> overwriteOutput("f", cl::desc("Overwrite existing output file."), cl::init(false), cl::cat(lz4dFlags));
42
43typedef void (*MainFunctionType)(char * byte_data, size_t filesize, bool hasBlockChecksum);
44
45void generatePipeline(ParabixDriver & pxDriver) {
46    auto & iBuilder = pxDriver.getBuilder();
47    Module * M = iBuilder->getModule();
48
49    Type * const sizeTy = iBuilder->getSizeTy();
50    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
51    Type * const voidTy = iBuilder->getVoidTy();
52    Type * const inputType = iBuilder->getInt8PtrTy();
53   
54    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, boolTy, nullptr));
55    main->setCallingConv(CallingConv::C);
56    Function::arg_iterator args = main->arg_begin();
57    Value * const inputStream = &*(args++);
58    inputStream->setName("input");
59    Value * const fileSize = &*(args++);
60    fileSize->setName("fileSize");
61    Value * const hasBlockChecksum = &*(args++);
62    hasBlockChecksum->setName("hasBlockChecksum");
63
64    const unsigned segmentSize = codegen::SegmentSize;
65    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
66    // Output buffer should be at least one whole LZ4 block (4MB) large in case of uncompressed blocks.
67    // And the size (in bytes) also needs to be a power of two.
68    const unsigned decompressBufBlocks = (4 * 1024 * 1024) / codegen::BlockSize;
69
70    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
71
72    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
73    StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
74    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
75    StreamSetBuffer * const LiteralIndexes = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
76    StreamSetBuffer * const MatchIndexes = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
77    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), decompressBufBlocks);
78
79   
80    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
81    sourceK->setInitialArguments({inputStream, fileSize});
82    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
83
84    // Input stream is not aligned due to the offset.
85    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ false);
86    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
87   
88    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
89    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
90
91    Kernel * lz4iK = pxDriver.addKernelInstance<LZ4IndexDecoderKernel>(iBuilder);
92    lz4iK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty())});
93    pxDriver.makeKernelCall(lz4iK, {ByteStream, Extenders}, {LiteralIndexes, MatchIndexes});
94
95    Kernel * lz4bK = pxDriver.addKernelInstance<LZ4ByteStreamDecoderKernel>(iBuilder, decompressBufBlocks * codegen::BlockSize);
96    pxDriver.makeKernelCall(lz4bK, {LiteralIndexes, MatchIndexes, ByteStream}, {DecompressedByteStream});
97
98    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
99    outK->setInitialArguments({iBuilder->GetString(outputFile)});
100    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
101 
102    pxDriver.generatePipelineIR();
103
104    pxDriver.deallocateBuffers();
105
106    iBuilder->CreateRetVoid();
107 
108    pxDriver.finalizeObject();
109}
110
111int main(int argc, char *argv[]) {
112    // This boilerplate provides convenient stack traces and clean LLVM exit
113    // handling. It also initializes the built in support for convenient
114    // command line option handling.
115    sys::PrintStackTraceOnErrorSignal(argv[0]);
116    llvm::PrettyStackTraceProgram X(argc, argv);
117    llvm_shutdown_obj shutdown;
118    codegen::ParseCommandLineOptions(argc, argv, {&lz4dFlags, codegen::codegen_flags()});
119    std::string fileName = inputFile;
120    LZ4FrameDecoder lz4Frame(fileName);
121    if (!lz4Frame.isValid()) {
122        errs() << "Invalid LZ4 file.\n";
123        return -1;
124    }
125
126    if (boost::filesystem::exists(outputFile)) {
127        if (overwriteOutput) {
128            boost::filesystem::remove(outputFile);
129        } else {
130            errs() << outputFile + " existed. Use -f argument to overwrite.\n";
131            return -1;
132        }
133    }
134
135    boost::iostreams::mapped_file_source mappedFile;
136    // Since mmap offset has to be multiples of pages, we can't use it to skip headers.
137    mappedFile.open(fileName, lz4Frame.getBlocksLength() + lz4Frame.getBlocksStart());
138    char *fileBuffer = const_cast<char *>(mappedFile.data()) + lz4Frame.getBlocksStart();
139    ParabixDriver pxDriver("lz4d");
140    generatePipeline(pxDriver);
141    auto main = reinterpret_cast<MainFunctionType>(pxDriver.getMain());
142
143    main(fileBuffer, lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
144
145    mappedFile.close();
146    return 0;
147}
Note: See TracBrowser for help on using the repository browser.