Changeset 6150


Ignore:
Timestamp:
Aug 27, 2018, 2:22:39 PM (3 months ago)
Author:
xwa163
Message:
  1. Remove LZParabix related codes
  2. Enable multiplexing for LZ4 ScanMatch? pipeline
  3. Some minor bug fixing
Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
2 deleted
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6143 r6150  
    103103add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    104104add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    105 add_library(LZ4_Lib lz4/lz4_frame_decoder.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp lz4/lz4_decompression_generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/decompression/lz4_bytestream_decompression.cpp kernels/lz4/decompression/lz4_swizzled_decompression.cpp kernels/lz4/decompression/lz4_sequential_decompression_base.cpp kernels/lz4/decompression/lz4_bitstream_decompression.cpp kernels/lz4/decompression/lz4_twist_decompression.cpp kernels/lz4/twist_kernel.cpp kernels/lz4/untwist_kernel.cpp lz4/lz4_base_generator.cpp)
    106 add_library(LZParabix_Lib lzparabix/LZParabixGenerator.cpp kernels/lzparabix/decoder/LZParabixBlockDecoder.cpp kernels/lzparabix/decoder/LZParabixAioBaseKernel.cpp lzparabix/LZParabixGrepGenerator.cpp kernels/fake_stream_generating_kernel.cpp kernels/lzparabix/encoder/LZParabixCompressionKernel.cpp kernels/lzparabix/decoder/LZParabixLiteralDecoderKernel.cpp kernels/lzparabix/decoder/LZParabixBitStreamAioKernel.cpp kernels/lzparabix/decoder/LZParabixSwizzledAioKernel.cpp)
     105add_library(LZ4_Lib lz4/lz4_frame_decoder.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp lz4/lz4_decompression_generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/decompression/lz4_bytestream_decompression.cpp kernels/lz4/decompression/lz4_swizzled_decompression.cpp kernels/lz4/decompression/lz4_sequential_decompression_base.cpp kernels/lz4/decompression/lz4_bitstream_decompression.cpp kernels/lz4/decompression/lz4_twist_decompression.cpp kernels/lz4/twist_kernel.cpp kernels/lz4/untwist_kernel.cpp lz4/lz4_base_generator.cpp kernels/lz4/lz4_match_detector.cpp kernels/lz4/lz4_match_detector.h)
    107106
    108107# force the compiler to compile the object cache to ensure that the versioning information is up to date
     
    131130add_executable(lz4_decoder lz4_decoder.cpp)
    132131add_executable(lz4_grep grep_interface.cpp util/file_select.cpp lz4_grep.cpp lz4/grep/lz4_grep_base_generator.cpp lz4/grep/lz4_grep_bytestream_generator.cpp lz4/grep/lz4_grep_bytestream_generator.h lz4/grep/lz4_grep_bitstream_generator.cpp lz4/grep/lz4_grep_bitstream_generator.h lz4/grep/lz4_grep_swizzle_generator.cpp lz4/grep/lz4_grep_swizzle_generator.h)
    133 add_executable(lzparabix_decoder lzparabix_decoder.cpp)
    134 add_executable(lzparabix_grep lzparabix_grep.cpp grep_interface.cpp util/file_select.cpp)
    135 add_executable(lzparabix_compressor lzparabix_compressor.cpp lzparabix/LZParabixCompressorGenerator.cpp)
    136132add_executable(idisa_test idisa_test.cpp)
    137133
     
    151147target_link_libraries (character_deposit PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    152148target_link_libraries (lz4_decoder LZ4_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    153 target_link_libraries (lz4_grep LZ4_Lib GrepEngine UCDlib PabloADT RegExpCompiler CodeGen CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    154 target_link_libraries (lzparabix_decoder LZParabix_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    155 target_link_libraries (lzparabix_grep LZParabix_Lib GrepEngine UCDlib PabloADT RegExpCompiler CodeGen CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    156 target_link_libraries (lzparabix_compressor LZParabix_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     149target_link_libraries (lz4_grep LZ4_Lib UCDlib PabloADT RegExpCompiler CodeGen CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    157150target_link_libraries (idisa_test CodeGen PabloADT ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    158151
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_bytestream_decompression.cpp

    r6148 r6150  
    1919    }
    2020
    21     LZ4ByteStreamDecompressionKernel::LZ4ByteStreamDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream, unsigned blockSize)
    22             : LZ4SequentialDecompressionKernel(b, "LZ4ByteStreamDecompressionKernel", blockSize),
     21    LZ4ByteStreamDecompressionKernel::LZ4ByteStreamDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream, unsigned blockSize, bool conditionalDecompression)
     22            : LZ4SequentialDecompressionKernel(b, "LZ4ByteStreamDecompressionKernel", blockSize, conditionalDecompression),
    2323              mCopyOtherByteStream(copyOtherByteStream) {
    2424        mStreamSetOutputs.push_back(Binding{b->getStreamSetTy(1, 8), "outputStream", BoundedRate(0, 1)});
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_bytestream_decompression.h

    r6148 r6150  
    99    class LZ4ByteStreamDecompressionKernel : public LZ4SequentialDecompressionKernel {
    1010    public:
    11         LZ4ByteStreamDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream = false, unsigned blockSize = 4 * 1024 * 1024);
     11        LZ4ByteStreamDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream = false, unsigned blockSize = 4 * 1024 * 1024, bool conditionalDecompression = false);
    1212
    1313
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_sequential_decompression_base.cpp

    r6136 r6150  
    1313
    1414namespace kernel{
    15     LZ4SequentialDecompressionKernel::LZ4SequentialDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, std::string&& kernelName, unsigned blockSize)
     15    LZ4SequentialDecompressionKernel::LZ4SequentialDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, std::string&& kernelName, unsigned blockSize, bool conditionalDecompression)
    1616            :SegmentOrientedKernel(std::move(kernelName),
    1717            // Inputs
    1818                                   {
    19                     Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
     19                    Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1), AlwaysConsume()},
    2020//                    Binding{b->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
    2121
    2222                    // block data
    23                     Binding{b->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1), AlwaysConsume()},
     23                    Binding{b->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1), AlwaysConsume()},
    2424                    Binding{b->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed"), AlwaysConsume()},
    2525                    Binding{b->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed"), AlwaysConsume()}
     
    4141                                           Binding{b->getInt1Ty(), "hasCallInitialization"}
    4242                                   }),
    43              mBlockSize(blockSize) {
     43             mBlockSize(blockSize),
     44             mConditionalDecompression(conditionalDecompression)
     45    {
     46        if (conditionalDecompression) {
     47            mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), "matches", BoundedRate(0, 1)});
     48        }
     49
     50
    4451        this->setStride(blockSize);
    4552        addAttribute(MustExplicitlyTerminate());
     53
    4654    }
    4755
     
    8492
    8593        b->SetInsertPoint(processBlock);
    86         //TODO handle uncompressed block
     94
     95        Value* isTerminal = b->CreateICmpUGE(blockEnd, b->getScalarField("fileSize"));
     96        b->setTerminationSignal(isTerminal);
     97
     98
     99        BasicBlock* actualProcessBlock = b->CreateBasicBlock("actualProcessBlock");
     100        BasicBlock* skipProcessBlock = b->CreateBasicBlock("actualProcessBlock");
     101        BasicBlock* processFinishBlock = b->CreateBasicBlock("processFinishBlock");
     102
     103        Value* shouldActuallyProcess = nullptr;
     104        if(mConditionalDecompression) {
     105
     106            Value* availableMatch = b->getAvailableItemCount("matches");
     107            Value* matchesStart = b->getProcessedItemCount("matches");
     108            Value* matchesEnd = b->CreateAdd(matchesStart, b->getSize(mBlockSize));
     109            matchesEnd = b->CreateUMin(matchesEnd, availableMatch);
     110
     111            Value* hasMatch = this->detectMatch(b, matchesStart, matchesEnd);
     112            shouldActuallyProcess = hasMatch;
     113        } else {
     114            shouldActuallyProcess = b->getInt1(true);
     115        }
     116        b->CreateCondBr(shouldActuallyProcess, actualProcessBlock, skipProcessBlock);
     117
     118        // ---- actualProcessBlock
     119        b->SetInsertPoint(actualProcessBlock);
     120
    87121        this->prepareProcessBlock(b, blockStart, blockEnd);
    88 
    89122        this->processCompressedLz4Block(b, blockStart, blockEnd);
    90 
    91123        this->storePendingOutput(b);
    92 
    93 //        this->storePendingM0(b);
    94 //        this->storePendingLiteralMask(b);
    95 //        this->storePendingMatchOffsetMarker(b);
     124        b->CreateBr(processFinishBlock);
     125
     126        // ---- skipProcessBlock
     127        b->SetInsertPoint(skipProcessBlock);
     128
     129        Value* oldOutputPos = b->getScalarField("outputPos");
     130        Value* newOutputPos = b->CreateAdd(oldOutputPos, b->getSize(mBlockSize));
     131        newOutputPos = b->CreateUMin(newOutputPos, b->getAvailableItemCount("matches"));
     132        b->setScalarField("outputPos", newOutputPos);
     133
     134        b->CreateBr(processFinishBlock);
     135
     136        // ---- processFinishBlock
     137
     138        b->SetInsertPoint(processFinishBlock);
    96139        Value * newBlockDataIndex = b->CreateAdd(blockDataIndex, b->getInt64(1));
    97140        b->setScalarField("blockDataIndex", newBlockDataIndex);
     
    99142        b->setProcessedItemCount("byteStream", blockEnd);
    100143        this->setProducedOutputItemCount(b, b->getScalarField("outputPos"));
     144        if (mConditionalDecompression) {
     145            b->setProcessedItemCount("matches", b->getScalarField("outputPos"));
     146        }
     147
    101148        b->CreateBr(exitBlock);
    102149
     
    123170    LZ4SequentialDecompressionKernel::processCompressedLz4Block(const std::unique_ptr<KernelBuilder> &b, llvm::Value *lz4BlockStart,
    124171                                                llvm::Value *lz4BlockEnd) {
    125         Value* isTerminal = b->CreateICmpEQ(lz4BlockEnd, b->getScalarField("fileSize"));
    126         b->setTerminationSignal(isTerminal);
     172
    127173
    128174        BasicBlock* exitBlock = b->CreateBasicBlock("processCompressedExitBlock");
     
    608654
    609655    // ---- Basic Function
     656    llvm::Value *
     657    LZ4SequentialDecompressionKernel::generateLoadInt8NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName,
     658                                llvm::Value *globalOffset) {
     659        Value * capacity = iBuilder->getCapacity(inputBufferName);
     660        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
     661        processed = iBuilder->CreateAnd(processed, iBuilder->CreateNeg(capacity));
     662        Value * offset = iBuilder->CreateSub(globalOffset, processed);
     663        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
     664        return iBuilder->CreateLoad(valuePtr);
     665    }
     666
    610667    llvm::Value *LZ4SequentialDecompressionKernel::generateLoadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder,
    611668                                                                      std::string inputBufferName, llvm::Value *globalOffset) {
     
    625682        );
    626683    }
     684
     685
     686    llvm::Value* LZ4SequentialDecompressionKernel::detectMatch(const std::unique_ptr<KernelBuilder> & b, llvm::Value* start, llvm::Value* end) {
     687        BasicBlock* entryBlock = b->GetInsertBlock();
     688
     689        Constant* SIZE_63 = b->getSize(63);
     690        Constant* SIZE_64 = b->getSize(64);
     691
     692        Value* bufferCapacity = b->getCapacity("matches");
     693        Value* inputBasePtr = b->CreatePointerCast(b->getRawInputPointer("matches", b->getSize(0)), b->getInt64Ty()->getPointerTo());
     694
     695        Value* startRem = b->CreateURem(start, bufferCapacity);
     696        Value* endRem = b->CreateSub(end, b->CreateSub(start, startRem));
     697        Value* startI64BlockIndex = b->CreateUDiv(startRem, SIZE_64);
     698
     699        Value* endI64BlockIndex = b->CreateUDiv(b->CreateAdd(endRem, SIZE_63), SIZE_64);
     700
     701        Value* startPtr = b->CreateGEP(inputBasePtr, startI64BlockIndex);
     702        Value* endPtr = b->CreateGEP(inputBasePtr, endI64BlockIndex);
     703
     704        BasicBlock* conBlock = b->CreateBasicBlock("conBlock");
     705        BasicBlock* bodyBlock = b->CreateBasicBlock("bodyBlock");
     706        BasicBlock* exitBlock = b->CreateBasicBlock("exitBlock");
     707
     708        b->CreateBr(conBlock);
     709
     710        b->SetInsertPoint(conBlock);
     711
     712        PHINode* currentPtr = b->CreatePHI(startPtr->getType(), 2);
     713        PHINode* hasMatch = b->CreatePHI(b->getInt1Ty(), 2);
     714        currentPtr->addIncoming(startPtr, entryBlock);
     715        hasMatch->addIncoming(b->getInt1(false), entryBlock);
     716
     717        Value* shouldContinue = b->CreateAnd(b->CreateNot(hasMatch), b->CreateICmpNE(currentPtr, endPtr));
     718
     719//        b->CallPrintInt("shouldContinue", shouldContinue);
     720
     721        b->CreateCondBr(shouldContinue, bodyBlock, exitBlock);
     722
     723        b->SetInsertPoint(bodyBlock);
     724        Value* currentValue = b->CreateLoad(currentPtr);
     725//        b->CallPrintInt("currentValue", currentValue);
     726        Value* m = b->CreateICmpNE(currentValue, b->getInt64(0));
     727
     728        currentPtr->addIncoming(b->CreateGEP(currentPtr, b->getSize(1)), b->GetInsertBlock());
     729        hasMatch->addIncoming(b->CreateOr(hasMatch, m), b->GetInsertBlock());
     730        b->CreateBr(conBlock);
     731
     732        b->SetInsertPoint(exitBlock);
     733        return hasMatch;
     734    }
     735
    627736}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_sequential_decompression_base.h

    r6136 r6150  
    2121class LZ4SequentialDecompressionKernel : public SegmentOrientedKernel {
    2222public:
    23     LZ4SequentialDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, std::string&& kernelName, unsigned blockSize = 4 * 1024 * 1024);
     23    LZ4SequentialDecompressionKernel(const std::unique_ptr<kernel::KernelBuilder> &b, std::string&& kernelName, unsigned blockSize = 4 * 1024 * 1024, bool conditionalDecompression = false);
    2424protected:
    2525    // ---- Constant
     
    8585    // ---- Basic Function
    8686    llvm::Value *
     87    generateLoadInt8NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName,
     88                                 llvm::Value *globalOffset);
     89    llvm::Value *
    8790    generateLoadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName,
    8891                                 llvm::Value *globalOffset);
     
    114117
    115118    virtual void setProducedOutputItemCount(const std::unique_ptr<KernelBuilder> &b, llvm::Value* produced) = 0;
     119
     120
     121    llvm::Value* detectMatch(const std::unique_ptr<KernelBuilder> & b, llvm::Value* start, llvm::Value* end);
     122private:
     123    bool mConditionalDecompression;
    116124};
    117125}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_twist_decompression.cpp

    r6143 r6150  
    142142        Constant* SIZE_ITEMS_PER_BYTE = b->getSize(mItemsPerByte);
    143143        Constant* INT_FW_TWIST_WIDTH = b->getIntN(COPY_FW, mTwistWidth);
    144         Type* INT8_PTR_TY = b->getInt8PtrTy();
     144//        Type* INT8_PTR_TY = b->getInt8PtrTy();
    145145        Type* INT_FW_TY = b->getIntNTy(COPY_FW);
    146146        Type* INT_FW_PTR_TY = INT_FW_TY->getPointerTo();
     
    218218        Constant* SIZE_ITEMS_PER_BYTE = b->getSize(mItemsPerByte);
    219219        Constant* INT_FW_TWIST_WIDTH = b->getIntN(COPY_FW, mTwistWidth);
    220         Type* INT8_PTR_TY = b->getInt8PtrTy();
     220//        Type* INT8_PTR_TY = b->getInt8PtrTy();
    221221        Type* INT_FW_TY = b->getIntNTy(COPY_FW);
    222222        Type* INT_FW_PTR_TY = INT_FW_TY->getPointerTo();
     
    346346        b->CreateMemCpy(actualOutputPtr, b->getScalarField("temporaryOutputPtr"), b->getSize(mBlockSize / mItemsPerByte), 1);
    347347
    348         Value* ptr = b->CreateGEP(b->CreatePointerCast(b->getScalarField("temporaryOutputPtr"), b->getBitBlockType()->getPointerTo()), b->getSize(0x16f));
     348//        Value* ptr = b->CreateGEP(b->CreatePointerCast(b->getScalarField("temporaryOutputPtr"), b->getBitBlockType()->getPointerTo()), b->getSize(0x16f));
    349349
    350350        b->setProducedItemCount("outputTwistStream", produced);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.h

    r6111 r6150  
    2929
    3030    void appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value *isCompressed, llvm::Value *blockStart, llvm::Value *blockEnd);
    31 
    3231    void generateStoreNumberOutput(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &outputBufferName, llvm::Value *offset, llvm::Value *value);
    3332
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6148 r6150  
    4242#include <llvm/Support/Debug.h>
    4343#include <kernels/lz4/lz4_block_decoder.h>
     44#include <kernels/lz4/lz4_match_detector.h>
    4445
    4546#include <re/re_seq.h>
    4647#include <kernels/kernel_builder.h>
    4748#include <re/re_alt.h>
     49#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
    4850
    4951namespace re { class CC; }
     
    6567}
    6668
    67 void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
    68     auto & iBuilder = mPxDriver.getBuilder();
    69     this->generateScanMatchMainFunc(iBuilder);
    70 
    71     StreamSetBuffer* compressedByteStream = this->loadByteStream();
    72 
    73     StreamSetBuffer * const uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
    74     StreamSetBuffer * uncompressedBitStream = this->s2p(uncompressedByteStream);
    75 
    76     StreamSetBuffer * LineBreakStream;
    77     StreamSetBuffer * Matches;
    78     std::tie(LineBreakStream, Matches) = grep(regex, uncompressedBitStream);
    79 
    80     kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
    81     scanMatchK->setInitialArguments({match_accumulator});
    82     mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
    83     mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    84     mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    85 
    86     mPxDriver.generatePipelineIR();
    87     mPxDriver.deallocateBuffers();
    88 
    89     iBuilder->CreateRetVoid();
    90 
    91     mPxDriver.finalizeObject();
     69void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC) {
     70    if (enableMultiplexing) {
     71        this->generateMultiplexingScanMatchGrepPipeline(regex, utf8CC);
     72    } else {
     73        this->generateFullyDecompressionScanMatchGrepPipeline(regex);
     74    }
    9275}
    9376
     
    190173        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    191174        auto numOfCharacterClasses = mpx_basis.size();
     175        llvm::errs() << "numOfUnicodeSet:" << UnicodeSets.size() << "\n";
     176
     177        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    192178        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    193179
     
    230216        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    231217        auto numOfCharacterClasses = mpx_basis.size();
    232 //        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
     218        llvm::errs() << "numOfUnicodeSet:" << UnicodeSets.size() << "\n";
     219        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    233220        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    234221
     
    280267
    281268std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepBaseGenerator::grep(
    282         re::RE *RE, parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing) {
     269        re::RE *RE, parabix::StreamSetBuffer *byteStream, parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing) {
    283270
    284271    this->initREs(RE);
     
    295282    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    296283
    297     StreamSetBuffer * LineBreakStream = this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     284    StreamSetBuffer * LineBreakStream = nullptr;
    298285
    299286
     
    301288
    302289    for(unsigned i = 0; i < nREs; ++i) {
    303         std::vector<std::string> externalStreamNames;
    304         std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
     290
    305291
    306292        if (ccMultiplexing) {
     293
     294            if (uncompressedBasisBits == nullptr) {
     295                uncompressedBasisBits = this->s2p(byteStream);
     296            }
     297            this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     298            std::vector<std::string> externalStreamNames;
     299            std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
     300
    307301            const auto UnicodeSets = re::collectCCs(mRE, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    308302            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     
    321315            MatchResultsBufs[i] = MatchResults;
    322316        } else {
    323             std::set<re::Name *> UnicodeProperties;
    324 
    325 
    326 
    327 
    328             StreamSetBuffer* nonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    329             kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    330             mGrepDriver->makeKernelCall(nonFinalK, icgrepInputSets, {nonFinalStream});
    331             icgrepInputSets.push_back(nonFinalStream);
    332             externalStreamNames.push_back("UTF8_nonfinal");
    333 
    334 
    335 
    336 
    337 
    338             StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    339             kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    340             mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    341             MatchResultsBufs[i] = MatchResults;
     317
     318            bool anyGCB = hasGraphemeClusterBoundary(mRE);
     319            bool isSimple = (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
     320            if (isSimple) {
     321                mRE = toUTF8(mRE);
     322            }
     323            const unsigned ByteCClimit = 6;
     324
     325            if (byteTestsWithinLimit(mRE, ByteCClimit)) {
     326                LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     327                kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
     328                mGrepDriver->makeKernelCall(breakK, {byteStream}, {LineBreakStream});
     329
     330                std::vector<std::string> externalStreamNames;
     331                std::vector<StreamSetBuffer *> icgrepInputSets = {byteStream};
     332                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     333                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mRE, externalStreamNames);
     334                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     335                MatchResultsBufs[i] = MatchResults;
     336
     337            } else {
     338
     339                if (uncompressedBasisBits == nullptr) {
     340                    uncompressedBasisBits = this->s2p(byteStream);
     341                }
     342                this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     343                std::vector<std::string> externalStreamNames;
     344                std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
     345
     346                std::set<re::Name *> UnicodeProperties;
     347                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     348                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
     349                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     350                MatchResultsBufs[i] = MatchResults;
     351            }
    342352        }
    343353    }
     
    375385
    376386
     387void LZ4GrepBaseGenerator::generateFullyDecompressionScanMatchGrepPipeline(re::RE *regex) {
     388    auto & iBuilder = mPxDriver.getBuilder();
     389    this->generateScanMatchMainFunc(iBuilder);
     390
     391    StreamSetBuffer* compressedByteStream = this->loadByteStream();
     392
     393    StreamSetBuffer * const uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
     394    StreamSetBuffer * uncompressedBitStream = this->s2p(uncompressedByteStream);
     395
     396    StreamSetBuffer * LineBreakStream;
     397    StreamSetBuffer * Matches;
     398    std::tie(LineBreakStream, Matches) = grep(regex, uncompressedByteStream, uncompressedBitStream);
     399
     400    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
     401    scanMatchK->setInitialArguments({match_accumulator});
     402    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
     403    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
     404    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
     405
     406    mPxDriver.generatePipelineIR();
     407    mPxDriver.deallocateBuffers();
     408
     409    iBuilder->CreateRetVoid();
     410
     411    mPxDriver.finalizeObject();
     412}
     413
     414void LZ4GrepBaseGenerator::generateMultiplexingScanMatchGrepPipeline(re::RE *regex, bool utf8CC) {
     415    auto & iBuilder = mPxDriver.getBuilder();
     416    this->generateScanMatchMainFunc(iBuilder);
     417
     418    StreamSetBuffer *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
     419    std::tie(compressedByteStream, compressedBasisBits) = this->loadByteStreamAndBitStream();
     420
     421    StreamSetBuffer * LineBreakStream;
     422    StreamSetBuffer * Matches;
     423    std::tie(LineBreakStream, Matches) = multiplexingGrep(regex, compressedByteStream, compressedBasisBits, utf8CC);
     424
     425//    Kernel* matchDetector = mPxDriver.addKernelInstance<LZ4MatchDetectorKernel>(iBuilder);
     426//    StreamSetBuffer* hasMatch = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8),
     427//                                                                  this->getDefaultBufferBlocks(), 1);
     428//    mPxDriver.makeKernelCall(matchDetector, {Matches, LineBreakStream}, {hasMatch});
     429
     430
     431    LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
     432
     433    StreamSetBuffer *const uncompressedByteStream =
     434            mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8),
     435                                              this->getDefaultBufferBlocks(), 1);
     436    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder, false, 4 * 1024 * 1024, true);
     437    lz4AioK->setInitialArguments({mFileSize});
     438    mPxDriver.makeKernelCall(
     439            lz4AioK,
     440            {
     441                    compressedByteStream,
     442
     443                    // Block Data
     444                    blockInfo.isCompress,
     445                    blockInfo.blockStart,
     446                    blockInfo.blockEnd,
     447                    Matches
     448            }, {
     449                    uncompressedByteStream
     450            });
     451
     452
     453    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
     454    scanMatchK->setInitialArguments({match_accumulator});
     455    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
     456    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
     457    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
     458
     459
     460    mPxDriver.generatePipelineIR();
     461
     462    mPxDriver.deallocateBuffers();
     463    iBuilder->CreateRetVoid();
     464    mPxDriver.finalizeObject();
     465}
     466
    377467
    378468void LZ4GrepBaseGenerator::generateMultiplexingCountOnlyGrepPipeline(re::RE *regex, bool utf8CC) {
     
    407497    this->generateCountOnlyMainFunc(iBuilder);
    408498
    409     StreamSetBuffer * const uncompressedBitStream = this->generateUncompressedBitStreams();
     499    StreamSetBuffer * const uncompressedByteStream = this->generateUncompressedByteStream();
     500//    StreamSetBuffer * const uncompressedBitStream = this->generateUncompressedBitStreams();
    410501
    411502    StreamSetBuffer * LineBreakStream;
    412503    StreamSetBuffer * Matches;
    413504
    414     std::tie(LineBreakStream, Matches) = grep(regex, uncompressedBitStream);
     505    std::tie(LineBreakStream, Matches) = grep(regex, uncompressedByteStream, nullptr);
    415506
    416507    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
     
    511602}
    512603
     604
     605
    513606std::vector<parabix::StreamSetBuffer *>
    514607LZ4GrepBaseGenerator::decompressBitStreams(parabix::StreamSetBuffer *compressedByteStream,
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.h

    r6145 r6150  
    2424    LZ4GrepBaseGenerator();
    2525
    26     void generateScanMatchGrepPipeline(re::RE* regex);
     26    void generateScanMatchGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC);
    2727    void generateCountOnlyGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC);
    2828
     
    3636
    3737protected:
     38    virtual parabix::StreamSetBuffer* generateUncompressedByteStream() {
     39        parabix::StreamSetBuffer* bitStreams = this->generateUncompressedBitStreams();
     40        return this->p2s(bitStreams);
     41    }
    3842    virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() = 0;
    3943    virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) = 0;
     
    7074
    7175
     76    void generateFullyDecompressionScanMatchGrepPipeline(re::RE *regex);
     77    void generateMultiplexingScanMatchGrepPipeline(re::RE *regex, bool utf8CC);
     78
     79
    7280    void generateFullyDecompressionCountOnlyGrepPipeline(re::RE *regex);
    7381    void generateMultiplexingCountOnlyGrepPipeline(re::RE *regex, bool utf8CC);
    7482
    7583
    76     std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grep(re::RE *RE,
     84    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grep(re::RE *RE, parabix::StreamSetBuffer *byteStream,
    7785                                                                           parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing = false);
    7886    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrep(
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bytestream_generator.cpp

    r6145 r6150  
    1414using namespace parabix;
    1515
    16 StreamSetBuffer *LZ4GrepByteStreamGenerator::generateUncompressedBitStreams() {
     16
     17parabix::StreamSetBuffer* LZ4GrepByteStreamGenerator::generateUncompressedByteStream() {
    1718    StreamSetBuffer* compressedByteStream = this->loadByteStream();
    1819    parabix::StreamSetBuffer * uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
     20    return uncompressedByteStream;
     21}
     22StreamSetBuffer *LZ4GrepByteStreamGenerator::generateUncompressedBitStreams() {
     23    parabix::StreamSetBuffer * uncompressedByteStream = this->generateUncompressedByteStream();
    1924    return this->s2p(uncompressedByteStream);
    2025}
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bytestream_generator.h

    r6145 r6150  
    77class LZ4GrepByteStreamGenerator: public LZ4GrepBaseGenerator {
    88protected:
     9    virtual parabix::StreamSetBuffer* generateUncompressedByteStream() override;
    910    virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() override;
    1011    virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) override;
  • icGREP/icgrep-devel/icgrep/lz4/lz4_base_generator.cpp

    r6143 r6150  
    66#include <kernels/source_kernel.h>
    77#include <kernels/s2p_kernel.h>
     8#include <kernels/p2s_kernel.h>
    89#include <kernels/swizzle.h>
    910#include <kernels/lz4/twist_kernel.h>
     
    1920using namespace kernel;
    2021
    21 LZ4BaseGenerator::LZ4BaseGenerator():mPxDriver("lz4"), mLz4BlockSize(4 * 1024 * 1024) {
     22LZ4BaseGenerator::LZ4BaseGenerator():mPxDriver("lz4"), mLz4BlockSize(4 * 1024 * 1024), mInitBlockInfo(false) {
    2223
    2324}
     
    4041    return basisBits;
    4142}
     43parabix::StreamSetBuffer* LZ4BaseGenerator::p2s(parabix::StreamSetBuffer* bitStream) {
     44    auto & b = mPxDriver.getBuilder();
     45    StreamSetBuffer* byteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8),
     46                                                                   this->getDefaultBufferBlocks());
     47    Kernel * p2sk = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian);
     48    mPxDriver.makeKernelCall(p2sk, {bitStream}, {byteStream});
     49    return byteStream;
     50}
    4251
    4352std::pair<parabix::StreamSetBuffer*, parabix::StreamSetBuffer*>  LZ4BaseGenerator::loadByteStreamAndBitStream() {
     
    4756}
    4857
    49 
    5058LZ4BlockInfo LZ4BaseGenerator::getBlockInfo(StreamSetBuffer* compressedByteStream) {
     59    if (mInitBlockInfo) {
     60        return mBlockInfo;
     61    }
     62
    5163    auto & b = mPxDriver.getBuilder();
    5264    LZ4BlockInfo blockInfo;
     
    6072    mPxDriver.makeKernelCall(blockDecoderK, {compressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
    6173
     74    mInitBlockInfo = true;
     75    mBlockInfo = blockInfo;
    6276    return blockInfo;
    6377}
  • icGREP/icgrep-devel/icgrep/lz4/lz4_base_generator.h

    r6143 r6150  
    3434    // Stream Conversion
    3535    parabix::StreamSetBuffer* s2p(parabix::StreamSetBuffer* byteStream);
     36    parabix::StreamSetBuffer* p2s(parabix::StreamSetBuffer* bitStream);
    3637
    3738
     
    7374
    7475    unsigned mLz4BlockSize;
     76
     77    bool mInitBlockInfo;
     78    LZ4BlockInfo mBlockInfo;
    7579};
    7680
  • icGREP/icgrep-devel/icgrep/lz4_grep.cpp

    r6143 r6150  
    9595        llvm::outs() << countResult << "\n";
    9696    } else {
    97         g->generateScanMatchGrepPipeline(re_ast);
     97        g->generateScanMatchGrepPipeline(re_ast, enableMultiplexing, utf8CC);
    9898        g->invokeScanMatchGrep(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
    9999    }
Note: See TracChangeset for help on using the changeset viewer.