source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6132

Last change on this file since 6132 was 6132, checked in by xwa163, 9 months ago
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
File size: 37.0 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
28#include <kernels/lz4/aio/lz4_bitstream_aio.h>
29#include <kernels/lz4/aio/lz4_i4_bytestream_aio.h>
30#include <kernels/bitstream_pdep_kernel.h>
31#include <kernels/lz4/lz4_bitstream_not_kernel.h>
32
33namespace re { class CC; }
34
35using namespace llvm;
36using namespace parabix;
37using namespace kernel;
38
39LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
40    mCompressionMarker = NULL;
41}
42
43MainFunctionType LZ4Generator::getMainFunc() {
44    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
45}
46
47void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
48    auto & iBuilder = mPxDriver.getBuilder();
49    this->generateMainFunc(iBuilder);
50
51    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
52
53    // GeneratePipeline
54    this->generateLoadByteStreamAndBitStream(iBuilder);
55
56    //// Decode Block Information
57    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
58    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
59    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
60
61    //// Generate Helper Markers Extenders, FX, XF
62    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
63    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
64    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
65    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
66
67
68    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
69    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
70    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
71
72    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
73    //TODO handle uncompressed part
74    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
76    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
77
78    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
79    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
80    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
81
82    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
83    Lz4IndexBuilderK->setInitialArguments({mFileSize});
84    mPxDriver.makeKernelCall(
85            Lz4IndexBuilderK,
86            {
87                    mCompressedByteStream,
88                    Extenders,
89
90                    // Block Data
91                    BlockData_IsCompressed,
92                    BlockData_BlockStart,
93                    BlockData_BlockEnd
94            }, {
95                    //Uncompressed Data
96                    UncompressedStartPos,
97                    UncompressedLength,
98                    UncompressedOutputPos,
99
100                    mCompressionMarker,
101                    mM0Marker,
102                    mMatchOffsetMarker
103            });
104
105    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
106    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
107
108
109    // Deletion
110    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
111    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
112
113    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
114    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
115
116    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
117    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
118    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
119
120
121    StreamSetBuffer * const extractedBits = compressedBits;
122
123    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
124    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
125
126    // --------------------------------------------------------
127    // End
128    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
129
130    outK->setInitialArguments({iBuilder->GetString(outputFile)});
131    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
132
133    mPxDriver.generatePipelineIR();
134    mPxDriver.deallocateBuffers();
135
136    iBuilder->CreateRetVoid();
137
138    mPxDriver.finalizeObject();
139}
140
141void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
142    auto & iBuilder = mPxDriver.getBuilder();
143    this->generateMainFunc(iBuilder);
144
145    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
146
147    // GeneratePipeline
148    this->generateLoadByteStreamAndBitStream(iBuilder);
149    this->generateExtractAndDepositMarkers(iBuilder);
150    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
151
152    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
153    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
154
155    // --------------------------------------------------------
156    // End
157    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
158
159    outK->setInitialArguments({iBuilder->GetString(outputFile)});
160    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
161
162    mPxDriver.generatePipelineIR();
163    mPxDriver.deallocateBuffers();
164
165    iBuilder->CreateRetVoid();
166
167    mPxDriver.finalizeObject();
168}
169
170void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
171    auto & iBuilder = mPxDriver.getBuilder();
172    this->generateMainFunc(iBuilder);
173
174    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
175
176    // GeneratePipeline
177    this->generateLoadByteStreamAndBitStream(iBuilder);
178    this->generateExtractAndDepositMarkers(iBuilder);
179    auto swizzle = this->generateSwizzleExtractData(iBuilder);
180
181
182    // Produce unswizzled bit streams
183    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
185
186    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
187
188
189    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
190    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
191
192    // --------------------------------------------------------
193    // End
194    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
195
196    outK->setInitialArguments({iBuilder->GetString(outputFile)});
197    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
198
199    mPxDriver.generatePipelineIR();
200    mPxDriver.deallocateBuffers();
201
202    iBuilder->CreateRetVoid();
203
204    mPxDriver.finalizeObject();
205}
206
207void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
208    auto & iBuilder = mPxDriver.getBuilder();
209    this->generateMainFunc(iBuilder);
210
211    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
212
213    // GeneratePipeline
214    this->generateLoadByteStreamAndBitStream(iBuilder);
215    this->generateExtractAndDepositMarkers(iBuilder);
216    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
217
218    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
219    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
220    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
221
222    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
223    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
224
225    // --------------------------------------------------------
226    // End
227    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
228
229    outK->setInitialArguments({iBuilder->GetString(outputFile)});
230    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
231
232    mPxDriver.generatePipelineIR();
233    mPxDriver.deallocateBuffers();
234
235    iBuilder->CreateRetVoid();
236
237    mPxDriver.finalizeObject();
238}
239
240void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
241    auto & iBuilder = mPxDriver.getBuilder();
242    this->generateMainFunc(iBuilder);
243
244    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
245
246    // GeneratePipeline
247    this->generateLoadByteStreamAndBitStream(iBuilder);
248    this->generateExtractAndDepositMarkers(iBuilder);
249
250    auto swizzle = this->generateSwizzleExtractData(iBuilder);
251
252    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
253    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
254
255    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
256    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
257
258    // Produce unswizzled bit streams
259    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
260    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
261    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
262
263    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
264    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
265
266    // --------------------------------------------------------
267    // End
268    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
269    outK->setInitialArguments({iBuilder->GetString(outputFile)});
270    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
271
272    mPxDriver.generatePipelineIR();
273    mPxDriver.deallocateBuffers();
274
275    iBuilder->CreateRetVoid();
276
277    mPxDriver.finalizeObject();
278}
279
280void LZ4Generator::generatePipeline(const std::string &outputFile) {
281    auto & iBuilder = mPxDriver.getBuilder();
282    this->generateMainFunc(iBuilder);
283
284    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
285
286    // GeneratePipeline
287    this->generateLoadByteStreamAndBitStream(iBuilder);
288    this->generateExtractAndDepositMarkers(iBuilder);
289    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
290
291    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
292    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
293    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
294
295    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
296    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
297    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
298
299    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
300    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
301
302    // --------------------------------------------------------
303    // End
304    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
305
306    outK->setInitialArguments({iBuilder->GetString(outputFile)});
307    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
308
309    mPxDriver.generatePipelineIR();
310    mPxDriver.deallocateBuffers();
311
312    iBuilder->CreateRetVoid();
313
314    mPxDriver.finalizeObject();
315}
316
317void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
318    auto & iBuilder = mPxDriver.getBuilder();
319    this->generateMainFunc(iBuilder);
320
321    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
322
323    // GeneratePipeline
324    this->generateLoadByteStreamAndBitStream(iBuilder);
325    this->generateExtractAndDepositMarkers(iBuilder);
326
327    auto swizzle = this->generateSwizzleExtractData(iBuilder);
328
329    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
330    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
331
332    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
333    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
334
335    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
336    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
337
338    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
339    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
340
341
342    // Produce unswizzled bit streams
343    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
344    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
345    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
346
347
348    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
349    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
350
351    // --------------------------------------------------------
352    // End
353    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
354    outK->setInitialArguments({iBuilder->GetString(outputFile)});
355    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
356
357    mPxDriver.generatePipelineIR();
358    mPxDriver.deallocateBuffers();
359
360    iBuilder->CreateRetVoid();
361
362    mPxDriver.finalizeObject();
363}
364
365void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
366    Module * M = iBuilder->getModule();
367    Type * const sizeTy = iBuilder->getSizeTy();
368    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
369    Type * const voidTy = iBuilder->getVoidTy();
370    Type * const inputType = iBuilder->getInt8PtrTy();
371
372    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
373    main->setCallingConv(CallingConv::C);
374    Function::arg_iterator args = main->arg_begin();
375    mInputStream = &*(args++);
376    mInputStream->setName("input");
377
378    mHeaderSize = &*(args++);
379    mHeaderSize->setName("mHeaderSize");
380
381    mFileSize = &*(args++);
382    mFileSize->setName("mFileSize");
383
384    mHasBlockChecksum = &*(args++);
385    mHasBlockChecksum->setName("mHasBlockChecksum");
386    // TODO for now, we do not handle blockCheckSum
387    mHasBlockChecksum = iBuilder->getInt1(false);
388
389    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
390}
391
392void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
393    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
394    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
395    sourceK->setInitialArguments({mInputStream, mFileSize});
396    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
397}
398void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
399    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
400    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
401
402    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
403    sourceK->setInitialArguments({mInputStream, mFileSize});
404    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
405    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
406    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
407}
408
409StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
410    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
411}
412
413std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
414        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
415    auto mGrepDriver = &mPxDriver;
416    auto & iBuilder = mGrepDriver->getBuilder();
417
418    //// Decode Block Information
419    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
420    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
421    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
422
423
424    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
425    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
426    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
427
428
429
430    if (compressedBitStreams[0]->getNumOfStreams() == 4) {
431        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
432        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::P2S4StreamByPDEP>(iBuilder);
433        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
434
435
436        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
437        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4I4ByteStreamAioKernel>(iBuilder);
438        lz4I4AioK->setInitialArguments({mFileSize});
439        mGrepDriver->makeKernelCall(lz4I4AioK, {
440                mCompressedByteStream,
441
442                // Block Data
443                BlockData_IsCompressed,
444                BlockData_BlockStart,
445                BlockData_BlockEnd,
446
447                twistedCharClasses
448        }, {
449                                            uncompressedTwistedCharClasses
450                                    });
451
452        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder));
453        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::S2P4StreamByPEXTKernel>(iBuilder);
454        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
455        return {untwistedCharClasses};
456    }
457
458
459
460
461    std::vector<StreamSetBuffer *> inputStreams = {
462            mCompressedByteStream,
463
464            // Block Data
465            BlockData_IsCompressed,
466            BlockData_BlockStart,
467            BlockData_BlockEnd
468    };
469
470    std::vector<StreamSetBuffer *> outputStream;
471    std::vector<unsigned> numbersOfStreams;
472
473    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
474        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
475        numbersOfStreams.push_back(numOfStreams);
476        inputStreams.push_back(compressedBitStreams[i]);
477        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
478    }
479
480    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
481    lz4AioK->setInitialArguments({mFileSize});
482    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
483
484    return outputStream;
485}
486
487
488StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
489    //// Decode Block Information
490    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
491    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
492    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
493
494    //// Generate Helper Markers Extenders, FX, XF
495//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
496//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
497//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
498//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
499
500
501    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
502    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
503    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
504
505
506    // Produce unswizzled bit streams
507    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
508    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
509    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
510    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
511
512
513
514    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
515    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
516
517
518    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
519    lz4AioK->setInitialArguments({mFileSize});
520    mPxDriver.makeKernelCall(
521            lz4AioK,
522            {
523                    mCompressedByteStream,
524
525//                    Extenders,
526
527                    // Block Data
528                    BlockData_IsCompressed,
529                    BlockData_BlockStart,
530                    BlockData_BlockEnd,
531
532                    u16Swizzle0,
533                    u16Swizzle1
534            }, {
535                    decompressedSwizzled0,
536                    decompressedSwizzled1
537            });
538
539
540    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
541
542    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
543    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
544
545    return decompressionBitStream;
546}
547
548parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
549    //// Decode Block Information
550    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
551    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
552    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
553
554    //// Generate Helper Markers Extenders
555//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
556//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
557//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
558//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
559
560    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
561    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
562    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
563
564
565    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
566
567    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
568    lz4AioK->setInitialArguments({mFileSize});
569    mPxDriver.makeKernelCall(
570            lz4AioK,
571            {
572                    mCompressedByteStream,
573
574//                    Extenders,
575
576                    // Block Data
577                    BlockData_IsCompressed,
578                    BlockData_BlockStart,
579                    BlockData_BlockEnd
580            }, {
581                    decompressionByteStream
582            });
583
584    return decompressionByteStream;
585
586}
587
588StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
589    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
590
591    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
592    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
593    lz4AioK->setInitialArguments({mFileSize});
594    mPxDriver.makeKernelCall(
595            lz4AioK,
596            {
597                    mCompressedByteStream,
598
599                    // Block Data
600                    blockInfo.isCompress,
601                    blockInfo.blockStart,
602                    blockInfo.blockEnd
603            }, {
604                    decompressionByteStream
605            });
606
607    return decompressionByteStream;
608}
609
610void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
611    //// Decode Block Information
612    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
613    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
614    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
615
616    //// Generate Helper Markers Extenders, FX, XF
617    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
618    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
619    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
620    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
621
622
623    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
624    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
625    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
626
627    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
628
629    //TODO handle uncompressed part
630    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
631    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
632    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
633
634    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
635    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
636    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
637
638    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
639    Lz4IndexBuilderK->setInitialArguments({mFileSize});
640    mPxDriver.makeKernelCall(
641            Lz4IndexBuilderK,
642            {
643                    mCompressedByteStream,
644                    Extenders,
645
646                    // Block Data
647                    BlockData_IsCompressed,
648                    BlockData_BlockStart,
649                    BlockData_BlockEnd
650            }, {
651                    //Uncompressed Data
652                    UncompressedStartPos,
653                    UncompressedLength,
654                    UncompressedOutputPos,
655
656                    mDeletionMarker,
657                    mM0Marker,
658                    mMatchOffsetMarker
659            });
660
661    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
662    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
663
664}
665
666std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
667    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
668    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
669
670    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
671    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
672    return std::make_pair(u16Swizzle0, u16Swizzle1);
673}
674
675void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
676    if (!mCompressionMarker) {
677        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
678        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
679        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
680    }
681}
682
683parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
684    this->generateCompressionMarker(iBuilder);
685
686    // Deletion
687    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
688    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
689
690    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
691    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
692
693    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
694    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
695    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
696
697    return compressedBits;
698}
699
700int LZ4Generator::get4MbBufferBlocks() {
701    return mLz4BlockSize / codegen::BlockSize;
702}
703
704int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
705    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
706}
707int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
708    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
709}
710
711LZ4BlockInfo LZ4Generator::getBlockInfo(const std::unique_ptr<kernel::KernelBuilder> & b) {
712    LZ4BlockInfo blockInfo;
713    blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getInputBufferBlocks(b), 1);
714    blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
715    blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
716
717    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b);
718    blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize});
719    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
720
721    return blockInfo;
722}
723
724
725// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.