Changeset 6130


Ignore:
Timestamp:
Jul 10, 2018, 1:56:50 AM (6 days ago)
Author:
xwa163
Message:

Decompress u8NoFinalStream in lzparabix_grep

Location:
icGREP/icgrep-devel/icgrep
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6127 r6130  
    414414    //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    415415    //                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
    416                     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::LittleEndian, true);
     416                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::LittleEndian);
    417417                    icgrepInputSets.push_back(CharClasses);
    418418                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
  • icGREP/icgrep-devel/icgrep/lzparabix/LZParabixGrepGenerator.cpp

    r6128 r6130  
    4848#include <llvm/Support/Debug.h>
    4949#include <kernels/fake_stream_generating_kernel.h>
     50#include <re/re_alt.h>
    5051
    5152namespace re { class CC; }
     
    187188    StreamSetBuffer * const LiteralBitStream = this->extractLiteralBitStream(idb);
    188189
     190    bool allCcByteLength = re::isAllCcByteLength(mREs[0]);
     191
    189192    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    190193    mREs[0] = transformCCs(mpx.get(), mREs[0]);
     
    198201    mGrepDriver->makeKernelCall(ccK, {LiteralBitStream}, {CharClasses});
    199202
    200 //    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    201 //    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
    202 //    mPxDriver.makeKernelCall(linefeedK, {LiteralBitStream}, {CompressedLineFeedStream});
    203 
    204 //    auto ret = this->generateBitStreamDecompression(idb, {CharClasses, CompressedLineFeedStream});
    205     auto ret = this->generateBitStreamDecompression(idb, {CharClasses});
    206 
    207     StreamSetBuffer * decompressedCharClasses = ret[0];
    208 //    StreamSetBuffer * LineBreakStream = ret[1];
    209 
    210 
    211     StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb), 1);
    212     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    213     mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    214 
    215     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
    216     mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
    217     MatchResultsBufs[0] = MatchResults;
    218 
    219     StreamSetBuffer * newLineBreak = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
    220     kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
    221     mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {newLineBreak});
    222 
     203
     204    StreamSetBuffer * newLineBreak = nullptr;
     205    if (allCcByteLength) {
     206        auto ret = this->generateBitStreamDecompression(idb, {CharClasses});
     207        StreamSetBuffer * decompressedCharClasses = ret[0];
     208        StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb), 1);
     209        Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     210        mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     211
     212        newLineBreak = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
     213        kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
     214        mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {newLineBreak});
     215
     216        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
     217        mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
     218        MatchResultsBufs[0] = MatchResults;
     219    } else {
     220        re::RE* nonFinalName = re::makeAlt({re::makeByte(0xC2, 0xF4),
     221                                            re::makeSeq({re::makeByte(0xE0, 0xF4), re::makeByte(0x80, 0xBF)}),
     222                                            re::makeSeq({re::makeByte(0xF0, 0xF4), re::makeByte(0x80, 0xBF), re::makeByte(0x80, 0xBF)})});
     223        StreamSetBuffer * compressedU8NoFinalStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
     224        kernel::Kernel * u8NoFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, nonFinalName, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
     225        mGrepDriver->makeKernelCall(u8NoFinalK, {LiteralBitStream}, {compressedU8NoFinalStream});
     226
     227        auto ret = this->generateBitStreamDecompression(idb, {CharClasses, compressedU8NoFinalStream});
     228
     229        StreamSetBuffer * decompressedCharClasses = ret[0];
     230        StreamSetBuffer * u8NoFinalStream = ret[1];
     231
     232        StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb), 1);
     233        Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     234        mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     235
     236        newLineBreak = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
     237        kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
     238        mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {newLineBreak});
     239
     240        externalStreamNames.push_back("UTF8_nonfinal");
     241
     242        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
     243        mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, u8NoFinalStream, decompressedCharClasses}, {MatchResults});
     244        MatchResultsBufs[0] = MatchResults;
     245    }
    223246
    224247    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
  • icGREP/icgrep-devel/icgrep/re/re_analysis.cpp

    r5951 r6130  
    105105}
    106106
    107 
     107bool isAllCcByteLength(const RE * re) {
     108    if (const Alt * alt = dyn_cast<Alt>(re)) {
     109        for (const RE * re : *alt) {
     110            if (!isAllCcByteLength(re)) {
     111                return false;
     112            }
     113        }
     114        return true;
     115    } else if (const Seq * seq = dyn_cast<Seq>(re)) {
     116        for (const RE * e : *seq) {
     117            if (!isAllCcByteLength(e)) return false;
     118        }
     119        return true;
     120    } else if (const Rep * rep = dyn_cast<Rep>(re)) {
     121        return isAllCcByteLength(rep->getRE());
     122    }  else if (const Name * n = dyn_cast<Name>(re)) {
     123        if (n->getType() == Name::Type::ZeroWidth) {
     124            return false;
     125        }
     126        return isAllCcByteLength(n->getDefinition());
     127    } else {
     128        return isByteLength(re);
     129    }
     130}
    108131
    109132bool isByteLength(const RE * re) {
  • icGREP/icgrep-devel/icgrep/re/re_analysis.h

    r5951 r6130  
    1717
    1818bool isByteLength(const RE * re);
     19
     20bool isAllCcByteLength(const RE * re);
    1921   
    2022bool isUnicodeUnitLength(const RE * re);
Note: See TracChangeset for help on using the changeset viewer.