Changeset 5906 for icGREP/icgrep-devel


Ignore:
Timestamp:
Mar 13, 2018, 12:09:23 PM (13 months ago)
Author:
xwa163
Message:

Implement swizzled match copy kernel, which can do match copy in swizzled bitstream form

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5893 r5906  
    123123add_executable(character_deletion character_deletion.cpp kernels/cc_kernel.cpp)
    124124add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp)
    125 add_executable(lz4d_ext_dep lz4d_ext_dep.cpp lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_block_decoder.cpp kernels/sequential_kernel.cpp kernels/lz4/lz4_extract_e_m0.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/lz4/lz4_numbers_to_bitstream_kernel.cpp kernels/lz4/LZ4MarkerToMaskKernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_match_copy_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp)
     125add_executable(lz4d_ext_dep lz4d_ext_dep.cpp lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_block_decoder.cpp kernels/sequential_kernel.cpp kernels/lz4/lz4_extract_e_m0.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/lz4/lz4_numbers_to_bitstream_kernel.cpp kernels/lz4/LZ4MarkerToMaskKernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_match_copy_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp)
    126126
    127127#Exclude CoRE from default build.
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r5895 r5906  
    2929#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
    3030#include <kernels/lz4/lz4_match_copy_kernel.h>
     31#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    3132
    3233namespace re { class CC; }
     
    137138
    138139    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    139     StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    140 
     140//    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    141141
    142142
     
    147147    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    148148
     149    //TODO buffer blocks should be decompressedBufferBlocks
    149150    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    150151    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     
    152153    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    153154    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     155
     156
     157    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     158    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     159
     160    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     161    pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     162
    154163
    155164    // Produce unswizzled bit streams
    156165    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    157166    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    158     pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
     167    pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     168//    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
    159169
    160170//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
     
    165175    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    166176
    167     Kernel * matchCopyK = pxDriver.addKernelInstance<LZ4MatchCopyKernel>(iBuilder);
    168     pxDriver.makeKernelCall(matchCopyK, {DecompressedByteStream, M0_Start, M0_End, Match_Offset}, {FinalDecompressedByteStream});
     177//    Kernel * matchCopyK = pxDriver.addKernelInstance<LZ4MatchCopyKernel>(iBuilder);
     178//    pxDriver.makeKernelCall(matchCopyK, {DecompressedByteStream, M0_Start, M0_End, Match_Offset}, {FinalDecompressedByteStream});
    169179
    170180    // --------------------------------------------------------
     
    172182    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    173183    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    174     pxDriver.makeKernelCall(outK, {FinalDecompressedByteStream}, {});
     184    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     185//    pxDriver.makeKernelCall(outK, {FinalDecompressedByteStream}, {});
    175186
    176187    pxDriver.generatePipelineIR();
     
    341352
    342353int LZ4Generator::getDecompressedBufferBlocks() {
    343     const unsigned decompressBufBlocks = 256U * 256U / codegen::BlockSize * 2 * 2; // TODO at least *2 since we need to leave 1 for match copy window
     354    const unsigned copyBackWindowBlocks = 256U * 256U / codegen::BlockSize;
     355    // At least * 2 since we need to leave 1 window as source of match copy,
     356    // while the other window as the destination for match copy
     357    const unsigned decompressBufBlocks = copyBackWindowBlocks * 2;
    344358    return decompressBufBlocks;
    345359}
Note: See TracChangeset for help on using the changeset viewer.