Ignore:
Timestamp:
May 30, 2018, 12:41:51 AM (12 months ago)
Author:
xwa163
Message:
  1. Enable swizzled match copy in multiplexing lz4_grep for some special case
  2. Implement some lz4 AIO (all-in-one) pipeline and related kernel
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6047 r6059  
    2323#include <kernels/lz4/lz4_block_decoder.h>
    2424#include <kernels/lz4/lz4_index_builder.h>
     25#include <kernels/lz4/lz4_index_builder_new.h>
     26#include <kernels/lz4/lz4_bytestream_aio.h>
     27#include <kernels/lz4/lz4_swizzled_aio.h>
    2528#include <kernels/bitstream_pdep_kernel.h>
    2629#include <kernels/lz4/lz4_bitstream_not_kernel.h>
     
    4043}
    4144
    42 void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
     45void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
    4346    auto & iBuilder = mPxDriver.getBuilder();
    4447    this->generateMainFunc(iBuilder);
     
    4851    // GeneratePipeline
    4952    this->generateLoadByteStreamAndBitStream(iBuilder);
    50     this->generateExtractAndDepositMarkers(iBuilder);
    51     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    52 
    53     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    54     mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
    55 
    56     // --------------------------------------------------------
    57     // End
    58     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    59 
    60     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    61     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    62 
    63     mPxDriver.generatePipelineIR();
    64     mPxDriver.deallocateBuffers();
    65 
    66     iBuilder->CreateRetVoid();
    67 
    68     mPxDriver.finalizeObject();
    69 }
    70 
    71 void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
    72     auto & iBuilder = mPxDriver.getBuilder();
    73     this->generateMainFunc(iBuilder);
    74 
    75     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    76 
    77     // GeneratePipeline
    78     this->generateLoadByteStreamAndBitStream(iBuilder);
    79     this->generateExtractAndDepositMarkers(iBuilder);
    80     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    81 
    82 
    83     // Produce unswizzled bit streams
    84     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    85     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    86 
    87     mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
    88 
    89 
    90     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    91     mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    92 
    93     // --------------------------------------------------------
    94     // End
    95     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    96 
    97     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    98     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    99 
    100     mPxDriver.generatePipelineIR();
    101     mPxDriver.deallocateBuffers();
    102 
    103     iBuilder->CreateRetVoid();
    104 
    105     mPxDriver.finalizeObject();
    106 }
    107 
    108 void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    109     auto & iBuilder = mPxDriver.getBuilder();
    110     this->generateMainFunc(iBuilder);
    111 
    112     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    113 
    114     // GeneratePipeline
    115     this->generateLoadByteStreamAndBitStream(iBuilder);
    116     this->generateExtractAndDepositMarkers(iBuilder);
    117     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    118 
    119     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
    120     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    121     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    122 
    123     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    124     mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
    125 
    126     // --------------------------------------------------------
    127     // End
    128     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    129 
    130     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    131     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    132 
    133     mPxDriver.generatePipelineIR();
    134     mPxDriver.deallocateBuffers();
    135 
    136     iBuilder->CreateRetVoid();
    137 
    138     mPxDriver.finalizeObject();
    139 }
    140 
    141 void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    142     auto & iBuilder = mPxDriver.getBuilder();
    143     this->generateMainFunc(iBuilder);
    144 
    145     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    146 
    147     // GeneratePipeline
    148     this->generateLoadByteStreamAndBitStream(iBuilder);
    149     this->generateExtractAndDepositMarkers(iBuilder);
    150 
    151     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    152 
    153     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    154     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    155 
    156     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    157     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    158 
    159     // Produce unswizzled bit streams
    160     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    161     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    162     mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
    163 
    164     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    165     mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
    166 
    167     // --------------------------------------------------------
    168     // End
    169     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    170     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    171     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    172 
    173     mPxDriver.generatePipelineIR();
    174     mPxDriver.deallocateBuffers();
    175 
    176     iBuilder->CreateRetVoid();
    177 
    178     mPxDriver.finalizeObject();
    179 }
    180 
    181 void LZ4Generator::generatePipeline(const std::string &outputFile) {
    182     auto & iBuilder = mPxDriver.getBuilder();
    183     this->generateMainFunc(iBuilder);
    184 
    185     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    186 
    187     // GeneratePipeline
    188     this->generateLoadByteStreamAndBitStream(iBuilder);
    189     this->generateExtractAndDepositMarkers(iBuilder);
    190     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    191 
    192     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
    193     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    194     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    195 
    196     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    197     Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    198     mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
    199 
    200     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    201     mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
    202 
    203     // --------------------------------------------------------
    204     // End
    205     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    206 
    207     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    208     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    209 
    210     mPxDriver.generatePipelineIR();
    211     mPxDriver.deallocateBuffers();
    212 
    213     iBuilder->CreateRetVoid();
    214 
    215     mPxDriver.finalizeObject();
    216 }
    217 
    218 void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
    219     auto & iBuilder = mPxDriver.getBuilder();
    220     this->generateMainFunc(iBuilder);
    221 
    222     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    223 
    224     // GeneratePipeline
    225     this->generateLoadByteStreamAndBitStream(iBuilder);
    226     this->generateExtractAndDepositMarkers(iBuilder);
    227 
    228     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    229 
    230     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    231     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    232 
    233     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    234     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    235 
    236     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    237     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    238 
    239     Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    240     mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    241 
    242 
    243     // Produce unswizzled bit streams
    244     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    245     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    246     mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
    247 
    248 
    249     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    250     mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
    251 
    252     // --------------------------------------------------------
    253     // End
    254     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    255     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    256     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    257 
    258     mPxDriver.generatePipelineIR();
    259     mPxDriver.deallocateBuffers();
    260 
    261     iBuilder->CreateRetVoid();
    262 
    263     mPxDriver.finalizeObject();
    264 }
    265 
    266 void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    267     Module * M = iBuilder->getModule();
    268     Type * const sizeTy = iBuilder->getSizeTy();
    269     Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
    270     Type * const voidTy = iBuilder->getVoidTy();
    271     Type * const inputType = iBuilder->getInt8PtrTy();
    272 
    273     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
    274     main->setCallingConv(CallingConv::C);
    275     Function::arg_iterator args = main->arg_begin();
    276     mInputStream = &*(args++);
    277     mInputStream->setName("input");
    278 
    279     mHeaderSize = &*(args++);
    280     mHeaderSize->setName("mHeaderSize");
    281 
    282     mFileSize = &*(args++);
    283     mFileSize->setName("mFileSize");
    284 
    285     mHasBlockChecksum = &*(args++);
    286     mHasBlockChecksum->setName("mHasBlockChecksum");
    287     // TODO for now, we do not handle blockCheckSum
    288     mHasBlockChecksum = iBuilder->getInt1(false);
    289 
    290     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    291 }
    292 
    293 void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    294     mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    295     mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
    296 
    297     kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
    298     sourceK->setInitialArguments({mInputStream, mFileSize});
    299     mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
    300     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
    301 //    s2pk->addAttribute(MustConsumeAll());
    302     mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
    303 }
    304 
    305 void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     53
    30654    //// Decode Block Information
    30755    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     
    31260    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    31361    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    314         // FX and XF streams will be added to IndexBuilderKernel in the future
    315 //    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    316 //    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    317 
    31862    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    319 //    extenderK->addAttribute(MustConsumeAll());
    32063    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    32164
     
    32568    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    32669
    327 //    re::CC* xfCC = re::makeCC(0x0f);
    328 //    re::CC* fxCC = re::makeCC(0xf0);
    329 //    for (re::codepoint_t i = 1; i <= 0xf; i++) {
    330 //        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
    331 //        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
    332 //    }
    333 
    334 //    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
    335 //    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
    336 
    337 //    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
    338 //    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
    339 
    34070    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    341 
    34271    //TODO handle uncompressed part
    34372    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     
    34574    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    34675
    347     mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     76    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    34877    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    34978    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    35079
    351     Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     80    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderNewKernel>(iBuilder);
    35281    Lz4IndexBuilderK->setInitialArguments({mFileSize});
    35382    mPxDriver.makeKernelCall(
     
    35685                    mCompressedByteStream,
    35786                    Extenders,
    358 //                    CC_0xFX,
    359 //                    CC_0xXF,
    36087
    36188                    // Block Data
     
    36996                    UncompressedOutputPos,
    37097
     98                    mCompressionMarker,
     99                    mM0Marker,
     100                    mMatchOffsetMarker
     101            });
     102
     103    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
     104    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
     105
     106
     107    // Deletion
     108    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     109    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
     110
     111    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
     112    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
     113
     114    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     115    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
     116    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
     117
     118
     119    StreamSetBuffer * const extractedBits = compressedBits;
     120
     121    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     122    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
     123
     124    // --------------------------------------------------------
     125    // End
     126    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     127
     128    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     129    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     130
     131    mPxDriver.generatePipelineIR();
     132    mPxDriver.deallocateBuffers();
     133
     134    iBuilder->CreateRetVoid();
     135
     136    mPxDriver.finalizeObject();
     137}
     138
     139void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
     140    auto & iBuilder = mPxDriver.getBuilder();
     141    this->generateMainFunc(iBuilder);
     142
     143    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     144
     145    // GeneratePipeline
     146    this->generateLoadByteStreamAndBitStream(iBuilder);
     147    this->generateExtractAndDepositMarkers(iBuilder);
     148    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
     149
     150    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     151    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
     152
     153    // --------------------------------------------------------
     154    // End
     155    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     156
     157    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     158    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     159
     160    mPxDriver.generatePipelineIR();
     161    mPxDriver.deallocateBuffers();
     162
     163    iBuilder->CreateRetVoid();
     164
     165    mPxDriver.finalizeObject();
     166}
     167
     168void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
     169    auto & iBuilder = mPxDriver.getBuilder();
     170    this->generateMainFunc(iBuilder);
     171
     172    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     173
     174    // GeneratePipeline
     175    this->generateLoadByteStreamAndBitStream(iBuilder);
     176    this->generateExtractAndDepositMarkers(iBuilder);
     177    auto swizzle = this->generateSwizzleExtractData(iBuilder);
     178
     179
     180    // Produce unswizzled bit streams
     181    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     182    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     183
     184    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
     185
     186
     187    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     188    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     189
     190    // --------------------------------------------------------
     191    // End
     192    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     193
     194    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     195    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     196
     197    mPxDriver.generatePipelineIR();
     198    mPxDriver.deallocateBuffers();
     199
     200    iBuilder->CreateRetVoid();
     201
     202    mPxDriver.finalizeObject();
     203}
     204
     205void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
     206    auto & iBuilder = mPxDriver.getBuilder();
     207    this->generateMainFunc(iBuilder);
     208
     209    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     210
     211    // GeneratePipeline
     212    this->generateLoadByteStreamAndBitStream(iBuilder);
     213    this->generateExtractAndDepositMarkers(iBuilder);
     214    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
     215
     216    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     217    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
     218    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
     219
     220    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     221    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
     222
     223    // --------------------------------------------------------
     224    // End
     225    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     226
     227    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     228    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     229
     230    mPxDriver.generatePipelineIR();
     231    mPxDriver.deallocateBuffers();
     232
     233    iBuilder->CreateRetVoid();
     234
     235    mPxDriver.finalizeObject();
     236}
     237
     238void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
     239    auto & iBuilder = mPxDriver.getBuilder();
     240    this->generateMainFunc(iBuilder);
     241
     242    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     243
     244    // GeneratePipeline
     245    this->generateLoadByteStreamAndBitStream(iBuilder);
     246    this->generateExtractAndDepositMarkers(iBuilder);
     247
     248    auto swizzle = this->generateSwizzleExtractData(iBuilder);
     249
     250    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     251    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     252
     253    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     254    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     255
     256    // Produce unswizzled bit streams
     257    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     258    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     259    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
     260
     261    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     262    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
     263
     264    // --------------------------------------------------------
     265    // End
     266    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     267    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     268    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     269
     270    mPxDriver.generatePipelineIR();
     271    mPxDriver.deallocateBuffers();
     272
     273    iBuilder->CreateRetVoid();
     274
     275    mPxDriver.finalizeObject();
     276}
     277
     278void LZ4Generator::generatePipeline(const std::string &outputFile) {
     279    auto & iBuilder = mPxDriver.getBuilder();
     280    this->generateMainFunc(iBuilder);
     281
     282    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     283
     284    // GeneratePipeline
     285    this->generateLoadByteStreamAndBitStream(iBuilder);
     286    this->generateExtractAndDepositMarkers(iBuilder);
     287    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
     288
     289    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     290    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
     291    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
     292
     293    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     294    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
     295    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     296
     297    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     298    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
     299
     300    // --------------------------------------------------------
     301    // End
     302    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     303
     304    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     305    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     306
     307    mPxDriver.generatePipelineIR();
     308    mPxDriver.deallocateBuffers();
     309
     310    iBuilder->CreateRetVoid();
     311
     312    mPxDriver.finalizeObject();
     313}
     314
     315void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
     316    auto & iBuilder = mPxDriver.getBuilder();
     317    this->generateMainFunc(iBuilder);
     318
     319    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     320
     321    // GeneratePipeline
     322    this->generateLoadByteStreamAndBitStream(iBuilder);
     323    this->generateExtractAndDepositMarkers(iBuilder);
     324
     325    auto swizzle = this->generateSwizzleExtractData(iBuilder);
     326
     327    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     328    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     329
     330    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     331    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     332
     333    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     334    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     335
     336    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     337    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     338
     339
     340    // Produce unswizzled bit streams
     341    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     342    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     343    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
     344
     345
     346    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     347    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
     348
     349    // --------------------------------------------------------
     350    // End
     351    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     352    outK->setInitialArguments({iBuilder->GetString(outputFile)});
     353    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     354
     355    mPxDriver.generatePipelineIR();
     356    mPxDriver.deallocateBuffers();
     357
     358    iBuilder->CreateRetVoid();
     359
     360    mPxDriver.finalizeObject();
     361}
     362
     363void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     364    Module * M = iBuilder->getModule();
     365    Type * const sizeTy = iBuilder->getSizeTy();
     366    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
     367    Type * const voidTy = iBuilder->getVoidTy();
     368    Type * const inputType = iBuilder->getInt8PtrTy();
     369
     370    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
     371    main->setCallingConv(CallingConv::C);
     372    Function::arg_iterator args = main->arg_begin();
     373    mInputStream = &*(args++);
     374    mInputStream->setName("input");
     375
     376    mHeaderSize = &*(args++);
     377    mHeaderSize->setName("mHeaderSize");
     378
     379    mFileSize = &*(args++);
     380    mFileSize->setName("mFileSize");
     381
     382    mHasBlockChecksum = &*(args++);
     383    mHasBlockChecksum->setName("mHasBlockChecksum");
     384    // TODO for now, we do not handle blockCheckSum
     385    mHasBlockChecksum = iBuilder->getInt1(false);
     386
     387    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
     388}
     389
     390void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     391    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     392    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
     393
     394    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     395    sourceK->setInitialArguments({mInputStream, mFileSize});
     396    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
     397    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
     398    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
     399}
     400
     401StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     402    //// Decode Block Information
     403    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     404    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     405    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     406
     407    //// Generate Helper Markers Extenders, FX, XF
     408    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     409    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     410    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     411    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     412
     413
     414    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     415    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
     416    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     417
     418
     419    // Produce unswizzled bit streams
     420    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     421    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     422    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
     423    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
     424
     425
     426
     427    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     428    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     429
     430
     431    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
     432    lz4AioK->setInitialArguments({mFileSize});
     433    mPxDriver.makeKernelCall(
     434            lz4AioK,
     435            {
     436                    mCompressedByteStream,
     437                    Extenders,
     438
     439                    // Block Data
     440                    BlockData_IsCompressed,
     441                    BlockData_BlockStart,
     442                    BlockData_BlockEnd,
     443
     444                    u16Swizzle0,
     445                    u16Swizzle1
     446            }, {
     447                    decompressedSwizzled0,
     448                    decompressedSwizzled1
     449            });
     450
     451
     452    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
     453
     454    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     455    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
     456
     457    return decompressionBitStream;
     458}
     459
     460
     461
     462StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     463    //// Decode Block Information
     464    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     465    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     466    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     467
     468    //// Generate Helper Markers Extenders, FX, XF
     469    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     470    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     471    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     472    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     473
     474
     475    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     476    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
     477    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     478
     479
     480    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     481
     482    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
     483    lz4AioK->setInitialArguments({mFileSize});
     484    mPxDriver.makeKernelCall(
     485            lz4AioK,
     486            {
     487                    mCompressedByteStream,
     488                    Extenders,
     489
     490                    // Block Data
     491                    BlockData_IsCompressed,
     492                    BlockData_BlockStart,
     493                    BlockData_BlockEnd
     494            }, {
     495                    decompressionByteStream
     496            });
     497
     498    return decompressionByteStream;
     499}
     500
     501void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     502    //// Decode Block Information
     503    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     504    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     505    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     506
     507    //// Generate Helper Markers Extenders, FX, XF
     508    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     509    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     510    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     511    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     512
     513
     514    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     515    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
     516    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     517
     518    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
     519
     520    //TODO handle uncompressed part
     521    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     522    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     523    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     524
     525    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     526    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     527    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     528
     529    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     530    Lz4IndexBuilderK->setInitialArguments({mFileSize});
     531    mPxDriver.makeKernelCall(
     532            Lz4IndexBuilderK,
     533            {
     534                    mCompressedByteStream,
     535                    Extenders,
     536
     537                    // Block Data
     538                    BlockData_IsCompressed,
     539                    BlockData_BlockStart,
     540                    BlockData_BlockEnd
     541            }, {
     542                    //Uncompressed Data
     543                    UncompressedStartPos,
     544                    UncompressedLength,
     545                    UncompressedOutputPos,
     546
    371547                    mDeletionMarker,
    372548                    mM0Marker,
Note: See TracChangeset for help on using the changeset viewer.