[5864] | 1 | |
---|
| 2 | #include "LZ4Generator.h" |
---|
| 3 | |
---|
| 4 | #include <boost/filesystem.hpp> |
---|
| 5 | #include <boost/iostreams/device/mapped_file.hpp> |
---|
| 6 | |
---|
| 7 | |
---|
| 8 | #include <cc/cc_compiler.h> |
---|
| 9 | |
---|
| 10 | #include <kernels/cc_kernel.h> |
---|
| 11 | #include <kernels/s2p_kernel.h> |
---|
| 12 | #include <kernels/p2s_kernel.h> |
---|
| 13 | #include <kernels/source_kernel.h> |
---|
| 14 | #include <kernels/stdout_kernel.h> |
---|
| 15 | #include <kernels/lz4/lz4_generate_deposit_stream.h> |
---|
| 16 | #include <kernels/kernel_builder.h> |
---|
| 17 | #include <kernels/deletion.h> |
---|
| 18 | #include <kernels/swizzle.h> |
---|
| 19 | #include <kernels/pdep_kernel.h> |
---|
[6026] | 20 | #include <kernels/swizzled_multiple_pdep_kernel.h> |
---|
[5906] | 21 | #include <kernels/lz4/lz4_swizzled_match_copy_kernel.h> |
---|
[6039] | 22 | #include <kernels/lz4/lz4_bitstream_match_copy_kernel.h> |
---|
[5948] | 23 | #include <kernels/lz4/lz4_block_decoder.h> |
---|
[5921] | 24 | #include <kernels/lz4/lz4_index_builder.h> |
---|
[6111] | 25 | #include <kernels/lz4/aio/lz4_bytestream_aio.h> |
---|
| 26 | #include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h> |
---|
| 27 | #include <kernels/lz4/aio/lz4_swizzled_aio.h> |
---|
[6118] | 28 | #include <kernels/lz4/aio/lz4_bitstream_aio.h> |
---|
[6132] | 29 | #include <kernels/lz4/aio/lz4_i4_bytestream_aio.h> |
---|
[6029] | 30 | #include <kernels/bitstream_pdep_kernel.h> |
---|
[6039] | 31 | #include <kernels/lz4/lz4_bitstream_not_kernel.h> |
---|
[5864] | 32 | |
---|
| 33 | namespace re { class CC; } |
---|
| 34 | |
---|
| 35 | using namespace llvm; |
---|
| 36 | using namespace parabix; |
---|
| 37 | using namespace kernel; |
---|
| 38 | |
---|
[6111] | 39 | LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) { |
---|
[6044] | 40 | mCompressionMarker = NULL; |
---|
[5864] | 41 | } |
---|
| 42 | |
---|
| 43 | MainFunctionType LZ4Generator::getMainFunc() { |
---|
[6026] | 44 | return reinterpret_cast<MainFunctionType>(mPxDriver.getMain()); |
---|
[5864] | 45 | } |
---|
| 46 | |
---|
[6059] | 47 | void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) { |
---|
| 48 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
| 49 | this->generateMainFunc(iBuilder); |
---|
| 50 | |
---|
[6066] | 51 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6059] | 52 | |
---|
| 53 | // GeneratePipeline |
---|
| 54 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
| 55 | |
---|
| 56 | //// Decode Block Information |
---|
[6066] | 57 | StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 58 | StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 59 | StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
[6059] | 60 | |
---|
| 61 | //// Generate Helper Markers Extenders, FX, XF |
---|
[6066] | 62 | StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 63 | mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
[6059] | 64 | Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8); |
---|
| 65 | mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders}); |
---|
| 66 | |
---|
| 67 | |
---|
[6111] | 68 | Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder); |
---|
[6059] | 69 | blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize}); |
---|
| 70 | mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd}); |
---|
| 71 | |
---|
| 72 | //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset |
---|
| 73 | //TODO handle uncompressed part |
---|
[6066] | 74 | StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 75 | StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 76 | StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
[6059] | 77 | |
---|
[6066] | 78 | mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
| 79 | mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder)); |
---|
| 80 | mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6059] | 81 | |
---|
[6111] | 82 | Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder); |
---|
[6059] | 83 | Lz4IndexBuilderK->setInitialArguments({mFileSize}); |
---|
| 84 | mPxDriver.makeKernelCall( |
---|
| 85 | Lz4IndexBuilderK, |
---|
| 86 | { |
---|
| 87 | mCompressedByteStream, |
---|
| 88 | Extenders, |
---|
| 89 | |
---|
| 90 | // Block Data |
---|
| 91 | BlockData_IsCompressed, |
---|
| 92 | BlockData_BlockStart, |
---|
| 93 | BlockData_BlockEnd |
---|
| 94 | }, { |
---|
| 95 | //Uncompressed Data |
---|
| 96 | UncompressedStartPos, |
---|
| 97 | UncompressedLength, |
---|
| 98 | UncompressedOutputPos, |
---|
| 99 | |
---|
| 100 | mCompressionMarker, |
---|
| 101 | mM0Marker, |
---|
| 102 | mMatchOffsetMarker |
---|
| 103 | }); |
---|
| 104 | |
---|
| 105 | Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder); |
---|
| 106 | mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker}); |
---|
| 107 | |
---|
| 108 | |
---|
| 109 | // Deletion |
---|
[6066] | 110 | StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
| 111 | StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder)); |
---|
[6059] | 112 | |
---|
| 113 | Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8); |
---|
| 114 | mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts}); |
---|
| 115 | |
---|
[6066] | 116 | StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
[6059] | 117 | Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8); |
---|
| 118 | mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits}); |
---|
| 119 | |
---|
| 120 | |
---|
| 121 | StreamSetBuffer * const extractedBits = compressedBits; |
---|
| 122 | |
---|
| 123 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
| 124 | mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream}); |
---|
| 125 | |
---|
| 126 | // -------------------------------------------------------- |
---|
| 127 | // End |
---|
| 128 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
| 129 | |
---|
| 130 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
| 131 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
| 132 | |
---|
| 133 | mPxDriver.generatePipelineIR(); |
---|
| 134 | mPxDriver.deallocateBuffers(); |
---|
| 135 | |
---|
| 136 | iBuilder->CreateRetVoid(); |
---|
| 137 | |
---|
| 138 | mPxDriver.finalizeObject(); |
---|
| 139 | } |
---|
| 140 | |
---|
[6039] | 141 | void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) { |
---|
[6026] | 142 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
[5864] | 143 | this->generateMainFunc(iBuilder); |
---|
| 144 | |
---|
[6066] | 145 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[5864] | 146 | |
---|
| 147 | // GeneratePipeline |
---|
| 148 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
[6039] | 149 | this->generateExtractAndDepositMarkers(iBuilder); |
---|
| 150 | StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder); |
---|
[5864] | 151 | |
---|
[6039] | 152 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
| 153 | mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream}); |
---|
[5948] | 154 | |
---|
[6039] | 155 | // -------------------------------------------------------- |
---|
| 156 | // End |
---|
| 157 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
[5864] | 158 | |
---|
[6039] | 159 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
| 160 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
[5948] | 161 | |
---|
[6039] | 162 | mPxDriver.generatePipelineIR(); |
---|
| 163 | mPxDriver.deallocateBuffers(); |
---|
| 164 | |
---|
| 165 | iBuilder->CreateRetVoid(); |
---|
| 166 | |
---|
| 167 | mPxDriver.finalizeObject(); |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) { |
---|
| 171 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
| 172 | this->generateMainFunc(iBuilder); |
---|
| 173 | |
---|
[6066] | 174 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6039] | 175 | |
---|
| 176 | // GeneratePipeline |
---|
| 177 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
| 178 | this->generateExtractAndDepositMarkers(iBuilder); |
---|
[5864] | 179 | auto swizzle = this->generateSwizzleExtractData(iBuilder); |
---|
| 180 | |
---|
| 181 | |
---|
| 182 | // Produce unswizzled bit streams |
---|
[6066] | 183 | StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
[6026] | 184 | Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2); |
---|
[5864] | 185 | |
---|
[6026] | 186 | mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits}); |
---|
[5864] | 187 | |
---|
| 188 | |
---|
[6026] | 189 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
| 190 | mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream}); |
---|
[5864] | 191 | |
---|
| 192 | // -------------------------------------------------------- |
---|
| 193 | // End |
---|
[6026] | 194 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
[5864] | 195 | |
---|
| 196 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
[6026] | 197 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
[5864] | 198 | |
---|
[6026] | 199 | mPxDriver.generatePipelineIR(); |
---|
| 200 | mPxDriver.deallocateBuffers(); |
---|
[5864] | 201 | |
---|
| 202 | iBuilder->CreateRetVoid(); |
---|
| 203 | |
---|
[6026] | 204 | mPxDriver.finalizeObject(); |
---|
[5864] | 205 | } |
---|
| 206 | |
---|
[5874] | 207 | void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) { |
---|
[6026] | 208 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
[5874] | 209 | this->generateMainFunc(iBuilder); |
---|
| 210 | |
---|
[6066] | 211 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[5874] | 212 | |
---|
| 213 | // GeneratePipeline |
---|
| 214 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
| 215 | this->generateExtractAndDepositMarkers(iBuilder); |
---|
[6039] | 216 | StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder); |
---|
[5874] | 217 | |
---|
[6066] | 218 | StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6029] | 219 | Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8); |
---|
| 220 | mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits}); |
---|
| 221 | |
---|
[6039] | 222 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
| 223 | mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream}); |
---|
[6029] | 224 | |
---|
[6039] | 225 | // -------------------------------------------------------- |
---|
| 226 | // End |
---|
| 227 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
| 228 | |
---|
| 229 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
| 230 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
| 231 | |
---|
| 232 | mPxDriver.generatePipelineIR(); |
---|
| 233 | mPxDriver.deallocateBuffers(); |
---|
| 234 | |
---|
| 235 | iBuilder->CreateRetVoid(); |
---|
| 236 | |
---|
| 237 | mPxDriver.finalizeObject(); |
---|
| 238 | } |
---|
| 239 | |
---|
| 240 | void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) { |
---|
| 241 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
| 242 | this->generateMainFunc(iBuilder); |
---|
| 243 | |
---|
[6066] | 244 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6039] | 245 | |
---|
| 246 | // GeneratePipeline |
---|
| 247 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
| 248 | this->generateExtractAndDepositMarkers(iBuilder); |
---|
| 249 | |
---|
| 250 | auto swizzle = this->generateSwizzleExtractData(iBuilder); |
---|
| 251 | |
---|
[6066] | 252 | StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 253 | StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
[5874] | 254 | |
---|
[6026] | 255 | Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2); |
---|
| 256 | mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1}); |
---|
[5874] | 257 | |
---|
| 258 | // Produce unswizzled bit streams |
---|
[6066] | 259 | StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
[6026] | 260 | Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2); |
---|
[6029] | 261 | mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits}); |
---|
[5874] | 262 | |
---|
[6026] | 263 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
[6029] | 264 | mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream}); |
---|
[5874] | 265 | |
---|
| 266 | // -------------------------------------------------------- |
---|
| 267 | // End |
---|
[6026] | 268 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
[5874] | 269 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
[6026] | 270 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
[5874] | 271 | |
---|
[6026] | 272 | mPxDriver.generatePipelineIR(); |
---|
| 273 | mPxDriver.deallocateBuffers(); |
---|
[5874] | 274 | |
---|
| 275 | iBuilder->CreateRetVoid(); |
---|
| 276 | |
---|
[6026] | 277 | mPxDriver.finalizeObject(); |
---|
[5874] | 278 | } |
---|
| 279 | |
---|
[6039] | 280 | void LZ4Generator::generatePipeline(const std::string &outputFile) { |
---|
[6026] | 281 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
[5864] | 282 | this->generateMainFunc(iBuilder); |
---|
| 283 | |
---|
[6066] | 284 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[5864] | 285 | |
---|
| 286 | // GeneratePipeline |
---|
| 287 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
| 288 | this->generateExtractAndDepositMarkers(iBuilder); |
---|
[6039] | 289 | StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder); |
---|
[5864] | 290 | |
---|
[6066] | 291 | StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6039] | 292 | Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8); |
---|
| 293 | mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits}); |
---|
| 294 | |
---|
[6066] | 295 | StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
[6039] | 296 | Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8); |
---|
| 297 | mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits}); |
---|
| 298 | |
---|
| 299 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
| 300 | mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream}); |
---|
| 301 | |
---|
| 302 | // -------------------------------------------------------- |
---|
| 303 | // End |
---|
| 304 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
| 305 | |
---|
| 306 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
| 307 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
| 308 | |
---|
| 309 | mPxDriver.generatePipelineIR(); |
---|
| 310 | mPxDriver.deallocateBuffers(); |
---|
| 311 | |
---|
| 312 | iBuilder->CreateRetVoid(); |
---|
| 313 | |
---|
| 314 | mPxDriver.finalizeObject(); |
---|
| 315 | } |
---|
| 316 | |
---|
| 317 | void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) { |
---|
| 318 | auto & iBuilder = mPxDriver.getBuilder(); |
---|
| 319 | this->generateMainFunc(iBuilder); |
---|
| 320 | |
---|
[6066] | 321 | StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6039] | 322 | |
---|
| 323 | // GeneratePipeline |
---|
| 324 | this->generateLoadByteStreamAndBitStream(iBuilder); |
---|
| 325 | this->generateExtractAndDepositMarkers(iBuilder); |
---|
| 326 | |
---|
[5864] | 327 | auto swizzle = this->generateSwizzleExtractData(iBuilder); |
---|
| 328 | |
---|
[6066] | 329 | StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1); |
---|
| 330 | StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1); |
---|
[5864] | 331 | |
---|
[6026] | 332 | Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2); |
---|
| 333 | mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1}); |
---|
[5874] | 334 | |
---|
[6066] | 335 | StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1); |
---|
| 336 | StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1); |
---|
[5906] | 337 | |
---|
[6026] | 338 | Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4); |
---|
| 339 | mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1}); |
---|
[5906] | 340 | |
---|
| 341 | |
---|
[5864] | 342 | // Produce unswizzled bit streams |
---|
[6066] | 343 | StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
[6026] | 344 | Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2); |
---|
[6039] | 345 | mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits}); |
---|
[5864] | 346 | |
---|
| 347 | |
---|
[6026] | 348 | Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder); |
---|
[6039] | 349 | mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream}); |
---|
[5864] | 350 | |
---|
| 351 | // -------------------------------------------------------- |
---|
| 352 | // End |
---|
[6026] | 353 | Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8); |
---|
[5864] | 354 | outK->setInitialArguments({iBuilder->GetString(outputFile)}); |
---|
[6026] | 355 | mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {}); |
---|
[5864] | 356 | |
---|
[6026] | 357 | mPxDriver.generatePipelineIR(); |
---|
| 358 | mPxDriver.deallocateBuffers(); |
---|
[5864] | 359 | |
---|
| 360 | iBuilder->CreateRetVoid(); |
---|
| 361 | |
---|
[6026] | 362 | mPxDriver.finalizeObject(); |
---|
[5864] | 363 | } |
---|
| 364 | |
---|
| 365 | void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
| 366 | Module * M = iBuilder->getModule(); |
---|
| 367 | Type * const sizeTy = iBuilder->getSizeTy(); |
---|
| 368 | Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8); |
---|
| 369 | Type * const voidTy = iBuilder->getVoidTy(); |
---|
| 370 | Type * const inputType = iBuilder->getInt8PtrTy(); |
---|
| 371 | |
---|
| 372 | Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr)); |
---|
| 373 | main->setCallingConv(CallingConv::C); |
---|
| 374 | Function::arg_iterator args = main->arg_begin(); |
---|
[6026] | 375 | mInputStream = &*(args++); |
---|
| 376 | mInputStream->setName("input"); |
---|
[5864] | 377 | |
---|
[6026] | 378 | mHeaderSize = &*(args++); |
---|
| 379 | mHeaderSize->setName("mHeaderSize"); |
---|
[5864] | 380 | |
---|
[6026] | 381 | mFileSize = &*(args++); |
---|
| 382 | mFileSize->setName("mFileSize"); |
---|
[5864] | 383 | |
---|
[6026] | 384 | mHasBlockChecksum = &*(args++); |
---|
| 385 | mHasBlockChecksum->setName("mHasBlockChecksum"); |
---|
[6020] | 386 | // TODO for now, we do not handle blockCheckSum |
---|
[6026] | 387 | mHasBlockChecksum = iBuilder->getInt1(false); |
---|
[5864] | 388 | |
---|
| 389 | iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0)); |
---|
| 390 | } |
---|
| 391 | |
---|
[6064] | 392 | void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
| 393 | mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)); |
---|
| 394 | kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder); |
---|
| 395 | sourceK->setInitialArguments({mInputStream, mFileSize}); |
---|
| 396 | mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream}); |
---|
| 397 | } |
---|
[5864] | 398 | void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
[6047] | 399 | mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)); |
---|
[6066] | 400 | mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder)); |
---|
[5864] | 401 | |
---|
[6026] | 402 | kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder); |
---|
| 403 | sourceK->setInitialArguments({mInputStream, mFileSize}); |
---|
| 404 | mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream}); |
---|
[6119] | 405 | Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian); |
---|
[6026] | 406 | mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits}); |
---|
[5864] | 407 | } |
---|
| 408 | |
---|
[6118] | 409 | StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
[6119] | 410 | return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0]; |
---|
| 411 | } |
---|
| 412 | |
---|
| 413 | std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach( |
---|
| 414 | std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) { |
---|
| 415 | auto mGrepDriver = &mPxDriver; |
---|
| 416 | auto & iBuilder = mGrepDriver->getBuilder(); |
---|
| 417 | |
---|
| 418 | //// Decode Block Information |
---|
[6118] | 419 | StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 420 | StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 421 | StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 422 | |
---|
| 423 | |
---|
| 424 | Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder); |
---|
| 425 | blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize}); |
---|
| 426 | mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd}); |
---|
| 427 | |
---|
| 428 | |
---|
[6132] | 429 | |
---|
| 430 | if (compressedBitStreams[0]->getNumOfStreams() == 4) { |
---|
| 431 | StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder)); |
---|
| 432 | kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::P2S4StreamByPDEP>(iBuilder); |
---|
| 433 | mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses}); |
---|
| 434 | |
---|
| 435 | |
---|
| 436 | StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder)); |
---|
| 437 | Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4I4ByteStreamAioKernel>(iBuilder); |
---|
| 438 | lz4I4AioK->setInitialArguments({mFileSize}); |
---|
| 439 | mGrepDriver->makeKernelCall(lz4I4AioK, { |
---|
| 440 | mCompressedByteStream, |
---|
| 441 | |
---|
| 442 | // Block Data |
---|
| 443 | BlockData_IsCompressed, |
---|
| 444 | BlockData_BlockStart, |
---|
| 445 | BlockData_BlockEnd, |
---|
| 446 | |
---|
| 447 | twistedCharClasses |
---|
| 448 | }, { |
---|
| 449 | uncompressedTwistedCharClasses |
---|
| 450 | }); |
---|
| 451 | |
---|
| 452 | StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder)); |
---|
| 453 | kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::S2P4StreamByPEXTKernel>(iBuilder); |
---|
| 454 | mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses}); |
---|
| 455 | return {untwistedCharClasses}; |
---|
| 456 | } |
---|
| 457 | |
---|
| 458 | |
---|
| 459 | |
---|
| 460 | |
---|
[6119] | 461 | std::vector<StreamSetBuffer *> inputStreams = { |
---|
| 462 | mCompressedByteStream, |
---|
[6118] | 463 | |
---|
[6119] | 464 | // Block Data |
---|
| 465 | BlockData_IsCompressed, |
---|
| 466 | BlockData_BlockStart, |
---|
| 467 | BlockData_BlockEnd |
---|
| 468 | }; |
---|
[6118] | 469 | |
---|
[6119] | 470 | std::vector<StreamSetBuffer *> outputStream; |
---|
| 471 | std::vector<unsigned> numbersOfStreams; |
---|
[6118] | 472 | |
---|
[6119] | 473 | for (unsigned i = 0; i < compressedBitStreams.size(); i++) { |
---|
| 474 | unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams(); |
---|
| 475 | numbersOfStreams.push_back(numOfStreams); |
---|
| 476 | inputStreams.push_back(compressedBitStreams[i]); |
---|
| 477 | outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder))); |
---|
| 478 | } |
---|
[6118] | 479 | |
---|
[6119] | 480 | Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams); |
---|
| 481 | lz4AioK->setInitialArguments({mFileSize}); |
---|
| 482 | mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream); |
---|
| 483 | |
---|
| 484 | return outputStream; |
---|
[6118] | 485 | } |
---|
| 486 | |
---|
[6119] | 487 | |
---|
[6059] | 488 | StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
[5864] | 489 | //// Decode Block Information |
---|
[6066] | 490 | StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 491 | StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 492 | StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
[5864] | 493 | |
---|
| 494 | //// Generate Helper Markers Extenders, FX, XF |
---|
[6111] | 495 | // StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 496 | // mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
| 497 | // Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8); |
---|
| 498 | // mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders}); |
---|
[5864] | 499 | |
---|
[6059] | 500 | |
---|
[6111] | 501 | Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder); |
---|
[6059] | 502 | blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize}); |
---|
| 503 | mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd}); |
---|
| 504 | |
---|
| 505 | |
---|
| 506 | // Produce unswizzled bit streams |
---|
[6066] | 507 | StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 508 | StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
[6059] | 509 | Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source"); |
---|
| 510 | mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1}); |
---|
| 511 | |
---|
| 512 | |
---|
| 513 | |
---|
[6066] | 514 | StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 515 | StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
[6059] | 516 | |
---|
| 517 | |
---|
| 518 | Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4); |
---|
| 519 | lz4AioK->setInitialArguments({mFileSize}); |
---|
| 520 | mPxDriver.makeKernelCall( |
---|
| 521 | lz4AioK, |
---|
| 522 | { |
---|
| 523 | mCompressedByteStream, |
---|
| 524 | |
---|
[6111] | 525 | // Extenders, |
---|
| 526 | |
---|
[6059] | 527 | // Block Data |
---|
| 528 | BlockData_IsCompressed, |
---|
| 529 | BlockData_BlockStart, |
---|
| 530 | BlockData_BlockEnd, |
---|
| 531 | |
---|
| 532 | u16Swizzle0, |
---|
| 533 | u16Swizzle1 |
---|
| 534 | }, { |
---|
| 535 | decompressedSwizzled0, |
---|
| 536 | decompressedSwizzled1 |
---|
| 537 | }); |
---|
| 538 | |
---|
| 539 | |
---|
[6066] | 540 | StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[6059] | 541 | |
---|
| 542 | Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2); |
---|
| 543 | mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream}); |
---|
| 544 | |
---|
| 545 | return decompressionBitStream; |
---|
| 546 | } |
---|
| 547 | |
---|
[6111] | 548 | parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) { |
---|
[6064] | 549 | //// Decode Block Information |
---|
[6066] | 550 | StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 551 | StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 552 | StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
[6059] | 553 | |
---|
[6064] | 554 | //// Generate Helper Markers Extenders |
---|
[6070] | 555 | // StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 556 | // mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
| 557 | // Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8); |
---|
| 558 | // mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders}); |
---|
[6059] | 559 | |
---|
[6111] | 560 | Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder); |
---|
[6064] | 561 | blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize}); |
---|
| 562 | mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd}); |
---|
| 563 | |
---|
| 564 | |
---|
[6066] | 565 | StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1); |
---|
[6064] | 566 | |
---|
[6111] | 567 | Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel); |
---|
[6064] | 568 | lz4AioK->setInitialArguments({mFileSize}); |
---|
| 569 | mPxDriver.makeKernelCall( |
---|
| 570 | lz4AioK, |
---|
| 571 | { |
---|
| 572 | mCompressedByteStream, |
---|
| 573 | |
---|
[6111] | 574 | // Extenders, |
---|
| 575 | |
---|
[6064] | 576 | // Block Data |
---|
| 577 | BlockData_IsCompressed, |
---|
| 578 | BlockData_BlockStart, |
---|
| 579 | BlockData_BlockEnd |
---|
| 580 | }, { |
---|
| 581 | decompressionByteStream |
---|
| 582 | }); |
---|
| 583 | |
---|
| 584 | return decompressionByteStream; |
---|
| 585 | |
---|
| 586 | } |
---|
| 587 | |
---|
[6059] | 588 | StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
[6132] | 589 | LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder); |
---|
[6059] | 590 | |
---|
[6066] | 591 | StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1); |
---|
[6059] | 592 | Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder); |
---|
| 593 | lz4AioK->setInitialArguments({mFileSize}); |
---|
| 594 | mPxDriver.makeKernelCall( |
---|
| 595 | lz4AioK, |
---|
| 596 | { |
---|
| 597 | mCompressedByteStream, |
---|
[5974] | 598 | |
---|
[6059] | 599 | // Block Data |
---|
[6132] | 600 | blockInfo.isCompress, |
---|
| 601 | blockInfo.blockStart, |
---|
| 602 | blockInfo.blockEnd |
---|
[6059] | 603 | }, { |
---|
| 604 | decompressionByteStream |
---|
| 605 | }); |
---|
| 606 | |
---|
| 607 | return decompressionByteStream; |
---|
| 608 | } |
---|
| 609 | |
---|
| 610 | void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
| 611 | //// Decode Block Information |
---|
[6066] | 612 | StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 613 | StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 614 | StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
[6059] | 615 | |
---|
| 616 | //// Generate Helper Markers Extenders, FX, XF |
---|
[6066] | 617 | StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 618 | mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
[6059] | 619 | Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8); |
---|
| 620 | mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders}); |
---|
| 621 | |
---|
| 622 | |
---|
[6111] | 623 | Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder); |
---|
[6059] | 624 | blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize}); |
---|
| 625 | mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd}); |
---|
| 626 | |
---|
[5864] | 627 | //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset |
---|
| 628 | |
---|
| 629 | //TODO handle uncompressed part |
---|
[6066] | 630 | StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 631 | StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 632 | StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1); |
---|
[5864] | 633 | |
---|
[6066] | 634 | mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
| 635 | mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder)); |
---|
| 636 | mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder)); |
---|
[5864] | 637 | |
---|
[6026] | 638 | Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder); |
---|
| 639 | Lz4IndexBuilderK->setInitialArguments({mFileSize}); |
---|
| 640 | mPxDriver.makeKernelCall( |
---|
[5948] | 641 | Lz4IndexBuilderK, |
---|
[5864] | 642 | { |
---|
[6026] | 643 | mCompressedByteStream, |
---|
[5864] | 644 | Extenders, |
---|
| 645 | |
---|
| 646 | // Block Data |
---|
| 647 | BlockData_IsCompressed, |
---|
| 648 | BlockData_BlockStart, |
---|
| 649 | BlockData_BlockEnd |
---|
| 650 | }, { |
---|
| 651 | //Uncompressed Data |
---|
| 652 | UncompressedStartPos, |
---|
| 653 | UncompressedLength, |
---|
| 654 | UncompressedOutputPos, |
---|
| 655 | |
---|
[6026] | 656 | mDeletionMarker, |
---|
| 657 | mM0Marker, |
---|
| 658 | mMatchOffsetMarker |
---|
[5864] | 659 | }); |
---|
| 660 | |
---|
[6026] | 661 | Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder); |
---|
| 662 | mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker}); |
---|
[5864] | 663 | |
---|
| 664 | } |
---|
| 665 | |
---|
| 666 | std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
[6066] | 667 | StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
| 668 | StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1); |
---|
[5864] | 669 | |
---|
[6026] | 670 | Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64); |
---|
| 671 | mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1}); |
---|
[5864] | 672 | return std::make_pair(u16Swizzle0, u16Swizzle1); |
---|
| 673 | } |
---|
| 674 | |
---|
[6043] | 675 | void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
| 676 | if (!mCompressionMarker) { |
---|
[6066] | 677 | mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder)); |
---|
[6043] | 678 | Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder); |
---|
| 679 | mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker}); |
---|
| 680 | } |
---|
| 681 | } |
---|
| 682 | |
---|
[6039] | 683 | parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { |
---|
[6043] | 684 | this->generateCompressionMarker(iBuilder); |
---|
[6039] | 685 | |
---|
| 686 | // Deletion |
---|
[6066] | 687 | StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
| 688 | StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder)); |
---|
[6039] | 689 | |
---|
| 690 | Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8); |
---|
[6043] | 691 | mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts}); |
---|
[6039] | 692 | |
---|
[6066] | 693 | StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder)); |
---|
[6039] | 694 | Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8); |
---|
| 695 | mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits}); |
---|
| 696 | |
---|
| 697 | return compressedBits; |
---|
| 698 | } |
---|
| 699 | |
---|
[5948] | 700 | int LZ4Generator::get4MbBufferBlocks() { |
---|
[6111] | 701 | return mLz4BlockSize / codegen::BlockSize; |
---|
[5948] | 702 | } |
---|
| 703 | |
---|
[6066] | 704 | int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) { |
---|
| 705 | return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64; |
---|
[5864] | 706 | } |
---|
[6066] | 707 | int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) { |
---|
| 708 | return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64; |
---|
[5864] | 709 | } |
---|
| 710 | |
---|
[6132] | 711 | LZ4BlockInfo LZ4Generator::getBlockInfo(const std::unique_ptr<kernel::KernelBuilder> & b) { |
---|
| 712 | LZ4BlockInfo blockInfo; |
---|
| 713 | blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getInputBufferBlocks(b), 1); |
---|
| 714 | blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1); |
---|
| 715 | blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1); |
---|
[5864] | 716 | |
---|
[6132] | 717 | Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b); |
---|
| 718 | blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize}); |
---|
| 719 | mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd}); |
---|
[5864] | 720 | |
---|
[6132] | 721 | return blockInfo; |
---|
| 722 | } |
---|
[5921] | 723 | |
---|
[6132] | 724 | |
---|
[5864] | 725 | // Kernel Pipeline |
---|