Changeset 6153


Ignore:
Timestamp:
Aug 29, 2018, 3:05:38 AM (4 weeks ago)
Author:
xwa163
Message:

lz4_grep: 1. fix typo 2. analyse the requirement of u8 non final

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6152 r6153  
    160160        re::RE* targetRe = mRE;
    161161
    162         bool allCcByteLength = re::isAllCcByteLength(mRE);
     162        bool requireNonFinal = re::isRequireNonFinal(mRE);
     163        std::vector<re::CC*> OriginalUnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));;
    163164
    164165        linefeedCC = re::makeCC(0x0A);
     
    166167        seq->push_back(targetRe);
    167168        seq->push_back(std::move(linefeedCC));
    168         seq->push_back(u8FinalRe);
     169        if (requireNonFinal) {
     170            seq->push_back(u8FinalRe);
     171        }
     172
    169173
    170174        std::vector<re::CC*> UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));;
     
    184188        mGrepDriver->makeKernelCall(ccK, {compressedBitStream}, {CharClasses});
    185189
    186         if (allCcByteLength) {
     190        if (!requireNonFinal) {
    187191            // We do not need to decompress U8 NonFinal Stream is all of the character class in target regular expression is byte length
    188192            uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
     
    210214            Kernel* notK = mGrepDriver->addKernelInstance<LZ4NotKernel>(idb);
    211215            mGrepDriver->makeKernelCall(notK, {u8FinalStream}, {u8NoFinalStream});
    212 
    213216        }
    214217    } else {
     
    301304
    302305    for(unsigned i = 0; i < nREs; ++i) {
    303 
    304306
    305307        if (ccMultiplexing) {
     
    353355                    uncompressedBasisBits = this->s2p(byteStream);
    354356                }
    355                 this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     357                LineBreakStream = this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
    356358                std::vector<std::string> externalStreamNames;
    357359                std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
  • icGREP/icgrep-devel/icgrep/re/re_analysis.cpp

    r6133 r6153  
    103103    }
    104104    return makeCC(); // otherwise = Start, End, Assertion
     105}
     106
     107bool isRequireNonFinal(const RE * re, bool checkByteLength) {
     108    if (checkByteLength) {
     109        bool allCcByteLength = isAllCcByteLength(re);
     110        if (allCcByteLength) {
     111            return false;
     112        }
     113    }
     114
     115    if (const Alt * alt = dyn_cast<Alt>(re)) {
     116        if (alt->size() == 1) {
     117            RE* first = (*alt)[0];
     118            return isRequireNonFinal(first, false);
     119        }
     120    } else if (const Seq * seq = dyn_cast<Seq>(re)) {
     121        if (seq->size() == 0) {
     122            return false;
     123        } else if (seq->size() == 1) {
     124            return isRequireNonFinal((*seq)[0], false);
     125        }
     126    } else if (const Name * n = dyn_cast<Name>(re)) {
     127        return false;
     128    } else if (const CC * cc = dyn_cast<CC>(re)) {
     129        return false;
     130    }
     131    return true;
    105132}
    106133
  • icGREP/icgrep-devel/icgrep/re/re_analysis.h

    r6130 r6153  
    1919
    2020bool isAllCcByteLength(const RE * re);
     21bool isRequireNonFinal(const RE * re, bool checkByteLength = true);
    2122   
    2223bool isUnicodeUnitLength(const RE * re);
Note: See TracChangeset for help on using the changeset viewer.