Ignore:
Timestamp:
Jun 24, 2018, 1:24:36 AM (10 months ago)
Author:
xwa163
Message:
  1. Cleanup LZ4 AIO related kernels
  2. Improve LZ4ParallelByteStreamAIOKernel
  3. Implement simd_cttz
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6089 r6111  
    66#include <llvm/Support/PrettyStackTrace.h>
    77
    8 #include <cc/alphabet.h>
    98#include <cc/cc_compiler.h>
    109
     
    2322#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    2423#include <kernels/lz4/lz4_bitstream_not_kernel.h>
    25 #include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
     24#include <kernels/fake_stream_generating_kernel.h>
    2625#include <kernels/bitstream_pdep_kernel.h>
    2726#include <kernels/bitstream_gather_pdep_kernel.h>
     
    5352#include <llvm/Support/Debug.h>
    5453#include <kernels/lz4/lz4_block_decoder.h>
    55 #include <kernels/lz4/lz4_swizzled_aio.h>
     54#include <kernels/lz4/aio/lz4_swizzled_aio.h>
    5655
    5756
     
    124123
    125124
    126     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     125    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    127126    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    128127    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    281280
    282281    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    283     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     282    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    284283    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    285284
     
    372371
    373372    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    374     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     373    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    375374    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    376375
     
    523522}
    524523
    525 void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE* regex) {
    526     auto & iBuilder = mPxDriver.getBuilder();
    527     this->generateMainFunc(iBuilder);
    528 
    529     // GeneratePipeline
    530     this->generateLoadByteStreamAndBitStream(iBuilder);
    531 
    532     std::vector<re::RE*> res = {regex};
    533     this->generateMultiplexingCompressedBitStream(res);
    534 
    535     mPxDriver.generatePipelineIR();
    536     mPxDriver.deallocateBuffers();
    537 
    538     iBuilder->CreateRetVoid();
    539 
    540     mPxDriver.finalizeObject();
    541 }
    542 
    543 void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline2(re::RE* regex) {
     524void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
    544525    auto & iBuilder = mPxDriver.getBuilder();
    545526    this->generateCountOnlyMainFunc(iBuilder);
     
    607588}
    608589
    609 void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter) {
     590void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
    610591    auto & iBuilder = mPxDriver.getBuilder();
    611592    this->generateCountOnlyMainFunc(iBuilder);
    612593
    613594    this->generateLoadByteStream(iBuilder);
    614     parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter);
     595    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
    615596
    616597
    617598    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    618     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
    619 //    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, cc::BitNumbering::BigEndian, "a");
     599    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ true, "a");
     600//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, cc::BitNumbering::LittleEndian, "a");
    620601    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
    621602
     
    651632
    652633    // GeneratePipeline
     634    this->generateLoadByteStream(iBuilder);
    653635//    this->generateLoadByteStreamAndBitStream(iBuilder);
    654     this->generateLoadByteStream(iBuilder);
     636
    655637    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
    656638
    657639
    658640    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    659     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
     641    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ true, "a");
    660642//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
    661643    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
Note: See TracChangeset for help on using the changeset viewer.