Ignore:
Timestamp:
Mar 30, 2018, 10:36:24 AM (19 months ago)
Author:
cameron
Message:

Direct CC builder work

Location:
icGREP/icgrep-devel/icgrep
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r5909 r5935  
    321321    }
    322322    return UnicodeSet(runs, mRunLength, mRunLength, quads, mQuadLength, mQuadLength);
     323}
     324
     325/** ------------------------------------------------------------------------------------------------------------- *
     326 * @brief isolates
     327 ** ------------------------------------------------------------------------------------------------------------- */
     328UnicodeSet UnicodeSet::isolates() const noexcept {
     329    UnicodeSet theIsolates;
     330    for (auto range : *this) {
     331        if (range.first == range.second) {
     332            theIsolates.insert(range.first);
     333        }
     334    }
     335    return theIsolates;
    323336}
    324337
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r5759 r5935  
    137137    UnicodeSet operator-(const UnicodeSet & other) const noexcept;
    138138    UnicodeSet operator^(const UnicodeSet & other) const noexcept;
     139   
     140    // The subset of a UnicodeSet consisting of the isolated codepoints only, i.e.,
     141    // those codepoints cp such that neither cp-1 nor cp+1 is a member of the set.
     142    UnicodeSet isolates () const noexcept;
    139143
    140144    UnicodeSet & operator=(const UnicodeSet & other) noexcept;
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r5872 r5935  
    240240    unsigned topBit = 1 << (codeUnitWidth - 1);
    241241    unsigned maxCodeVal = (topBit - 1) | topBit;
     242    //
     243    // Optimization if there are isolated codepoints that are not in the set.
     244    UCD::UnicodeSet negatedIsolates = (~(*cc)).isolates();
     245    UCD::UnicodeSet withNegatedIsolates = (*cc + negatedIsolates);
    242246    PabloAST * ccStrm = pb.createZeroes();
    243     for (const auto & interval : *cc) {
     247    for (const auto & interval : withNegatedIsolates) {
    244248        unsigned lo = re::lo_codepoint(interval);
    245249        unsigned hi = re::hi_codepoint(interval);
     
    265269        }
    266270    }
     271    if (!negatedIsolates.empty()) {
     272        PabloAST * toExclude = pb.createZeroes();
     273        for (const auto & interval : negatedIsolates) {
     274            PabloAST * testVal = pb.createRepeat(codeUnitWidth, re::lo_codepoint(interval));
     275            toExclude = pb.createOr(toExclude, pb.createEquals(codeUnitStream, testVal));
     276        }
     277        ccStrm = pb.createAnd(ccStrm, pb.createNot(toExclude));
     278    }
    267279    return ccStrm;
    268280}
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5915 r5935  
    380380                              re::makeCC(re::makeCC(0x43), re::makeCC(0x63)),
    381381                              re::makeCC(re::makeCC(0x54), re::makeCC(0x74)),
    382                               re::makeCC(re::makeCC(0x47), re::makeCC(0x67))}, 1);
     382                              re::makeCC(re::makeCC(0x47), re::makeCC(0x67))});
    383383    pxDriver.makeKernelCall(ccck, {ByteStream}, {ChStream});
    384384
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5934 r5935  
    277277                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    278278                std::string ccNameStr = ccName->getFullName();
    279                 errs () << "Replacing: " << ccNameStr << "\n";
    280279                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    281                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc}, 1);
     280                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    282281                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
    283282                externalStreamNames.push_back(ccNameStr);
     
    289288        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    290289        MatchResultsBufs[0] = MatchResults;
    291         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC}, 1);
     290        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    292291        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
    293292    } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
     
    296295        mGrepDriver->makeKernelCall(icgrepK, {ByteStream}, {MatchResults});
    297296        MatchResultsBufs[0] = MatchResults;
    298         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC}, 1);
     297        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    299298        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
    300299    } else {
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5902 r5935  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
     
    1717
    1818DirectCharacterClassKernelBuilder::DirectCharacterClassKernelBuilder(
    19         const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses, unsigned codeUnitSize)
    20 : BlockOrientedKernel(std::move(ccSetName),
    21               {Binding{b->getStreamSetTy(1, 8 * codeUnitSize), "codeUnitStream", FixedRate(), Principal()}},
    22               {Binding{b->getStreamSetTy(charClasses.size(), 1), "ccStream"}},
    23               {}, {}, {})
    24 , mCharClasses(charClasses)
    25 , mCodeUnitSize(codeUnitSize) {
    26     if (codeUnitSize > 4) errs() << "codeUnitsize of " << codeUnitSize << " too large!\n";
     19        const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses)
     20: PabloKernel(b, ccSetName +"_direct",
     21              {Binding{b->getStreamSetTy(1, 8), "byteStream"}},
     22              {Binding{b->getStreamSetTy(charClasses.size(), 1), "ccStream"}})
     23, mCharClasses(charClasses) {
    2724}
    2825
    29 void DirectCharacterClassKernelBuilder::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    30     unsigned packCount = 8 * mCodeUnitSize; 
    31     unsigned codeUnitWidth = 8 * mCodeUnitSize;
    32     unsigned topBit = 1 << codeUnitWidth;
    33     unsigned maxCodeVal = (topBit - 1) | topBit;
    34     Value * codeUnitPack[packCount];
    35     for (unsigned i = 0; i < packCount; i++) {
    36         codeUnitPack[i] = iBuilder->loadInputStreamPack("codeUnitStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    37     }
    38     for (unsigned j = 0; j < mCharClasses.size();  j++) {
    39         Value * theCCstream = iBuilder->allZeroes();
    40         for (const auto & interval : *mCharClasses[j]) {
    41             Value * strmPack[packCount];
    42             unsigned lo = re::lo_codepoint(interval);
    43             unsigned hi = re::hi_codepoint(interval);
    44             if (lo == hi) {
    45                 Value * cp = ConstantInt::get(iBuilder->getIntNTy(codeUnitWidth), lo);
    46                 Value * cp_splat = iBuilder->simd_fill(codeUnitWidth, cp);
    47                 for (unsigned k = 0; k < packCount; k++) {
    48                     strmPack[k] = iBuilder->simd_eq(codeUnitWidth, codeUnitPack[k], cp_splat);
    49                 }
    50             } else if (lo == 0) {
    51                 if (hi == maxCodeVal) {
    52                     for (unsigned k = 0; k < packCount; k++) {
    53                         strmPack[k] = iBuilder->allOnes();
    54                     }
    55                 } else {
    56                     Value * cp = ConstantInt::get(iBuilder->getIntNTy(codeUnitWidth), hi + 1);
    57                     Value * cp_splat = iBuilder->simd_fill(codeUnitWidth, cp);
    58                     for (unsigned k = 0; k < packCount; k++) {
    59                         strmPack[k] = iBuilder->simd_ult(codeUnitWidth, codeUnitPack[k], cp_splat);
    60                     }
    61                 }
    62             } else if (hi == maxCodeVal) {
    63                 Value * cp = ConstantInt::get(iBuilder->getIntNTy(codeUnitWidth), lo - 1);
    64                 Value * cp_splat = iBuilder->simd_fill(codeUnitWidth, cp);
    65                 for (unsigned k = 0; k < packCount; k++) {
    66                     strmPack[k] = iBuilder->simd_ugt(codeUnitWidth, codeUnitPack[k], cp_splat);
    67                 }
    68             } else {
    69                 Value * v1 = ConstantInt::get(iBuilder->getIntNTy(codeUnitWidth), lo-1);
    70                 Value * lo_splat = iBuilder->simd_fill(codeUnitWidth, v1);
    71                 Value * v2 = ConstantInt::get(iBuilder->getIntNTy(codeUnitWidth), hi+1);
    72                 Value * hi_splat = iBuilder->simd_fill(codeUnitWidth, v2);
    73                 for (unsigned k = 0; k < packCount; k++) {
    74                     Value * lo_test = iBuilder->simd_ugt(codeUnitWidth, codeUnitPack[k], lo_splat);
    75                     Value * hi_test = iBuilder->simd_ult(codeUnitWidth, codeUnitPack[k], hi_splat);
    76                     strmPack[k] = iBuilder->simd_and(lo_test, hi_test);
    77                 }
    78             }
    79             unsigned packFields = iBuilder->getBitBlockWidth()/packCount;
    80             Value * pack = iBuilder->allZeroes();
    81             for (unsigned k = 0; k < packCount; k++) {
    82                 pack = iBuilder->mvmd_insert(packFields, pack, iBuilder->CreateTrunc(iBuilder->hsimd_signmask(codeUnitWidth, strmPack[k]), iBuilder->getIntNTy(packFields)), k);
    83             }
    84 
    85             theCCstream = iBuilder->simd_or(theCCstream, pack);
    86         }
    87         iBuilder->storeOutputStreamBlock("ccStream", iBuilder->getInt32(j), theCCstream);
     26void DirectCharacterClassKernelBuilder::generatePabloMethod() {
     27    PabloBuilder pb(getEntryScope());
     28    cc::Direct_CC_Compiler ccc(getEntryScope(), getInputStreamSet("byteStream")[0]);
     29    Var * outputVar = getOutputStreamVar("ccStream");
     30    for (unsigned i = 0; i < mCharClasses.size(); ++i) {
     31        pb.createAssign(pb.createExtract(outputVar, i), ccc.compileCC(mCharClasses[i]));
    8832    }
    8933}
     
    9135
    9236ParabixCharacterClassKernelBuilder::ParabixCharacterClassKernelBuilder (
    93         const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, const std::vector<CC *> & charClasses, unsigned codeUnitSize)
     37        const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, const std::vector<CC *> & charClasses, unsigned codeUnitWidth)
    9438: PabloKernel(b, ccSetName +"_kernel",
    9539// stream inputs
    96 {Binding{b->getStreamSetTy(codeUnitSize), "basis"}}
     40{Binding{b->getStreamSetTy(codeUnitWidth), "basis"}}
    9741// stream outputs
    9842, {Binding(b->getStreamSetTy((unsigned int)charClasses.size()), "outputStream")}
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.h

    r5820 r5935  
    1313namespace kernel {
    1414
    15 class DirectCharacterClassKernelBuilder final : public BlockOrientedKernel {
     15class DirectCharacterClassKernelBuilder final : public pablo::PabloKernel {
    1616public:   
    17     DirectCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses, unsigned codeUnitSize);
    18     void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    19     bool isCachable() const override { return true;}
    20     bool hasSignature() const override { return false; }
     17    DirectCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses);
     18protected:
     19    void generatePabloMethod() override;
    2120private:
    2221    const std::vector<re::CC *> mCharClasses;
    23     const unsigned              mCodeUnitSize;
    24    
    2522};
    2623
    2724class ParabixCharacterClassKernelBuilder final : public pablo::PabloKernel {
    2825public:
    29     ParabixCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned codeUnitSize);
     26    ParabixCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned codeUnitWidth);
    3027protected:
    3128    void generatePabloMethod() override;
  • icGREP/icgrep-devel/icgrep/pablo/builder.hpp

    r5889 r5935  
    139139
    140140    PabloAST * createRepeat(const int64_t fieldWidth, const int64_t value) {
    141         return createRepeat(mPb->getInteger(fieldWidth), mPb->getInteger(value));
     141        std::stringstream name;
     142        name << "repeating<" << fieldWidth << ">(" << std::hex << value << ")";
     143        return createRepeat(mPb->getInteger(fieldWidth), mPb->getInteger(value), name.str());
    142144    }
    143145
  • icGREP/icgrep-devel/icgrep/re/collect_ccs.cpp

    r5934 r5935  
    1212#include <re/re_assertion.h>
    1313#include <cc/alphabet.h>
     14#include <re/re_memoizer.hpp>
     15
    1416#include <boost/container/flat_set.hpp>
    1517#include <llvm/Support/raw_ostream.h>
     
    1921namespace re {
    2022   
    21 struct SetCollector {
     23struct SetCollector : private Memoizer {
    2224    void collect(RE * const re);
    2325public:
    2426    const cc::Alphabet * alphabet;
    2527    std::vector<CC *> theSets;
    26     boost::container::flat_set<const RE *>  Visited;
    2728    std::set<Name *> ignoredExternals;
    2829};
     
    3031void SetCollector::collect(RE * const re) {
    3132    assert ("RE object cannot be null!" && re);
    32     if (Visited.insert(re).second) {
    33         if (CC * cc = dyn_cast<CC>(re)) {
    34             if (cc->getAlphabet() == alphabet) {
    35                 const auto index = find(theSets.begin(), theSets.end(), cc) - theSets.begin();
    36                 if (index == theSets.size()) theSets.push_back(cc);
     33    if (CC * cc = dyn_cast<CC>(re)) {
     34        if (cc->getAlphabet() == alphabet) {
     35            if (find(cc) == end()) {
     36                cc = memoize(cc);
     37                theSets.push_back(cc);
    3738            }
    38         } else if (isa<Name>(re)) {
    39             if (ignoredExternals.find(cast<Name>(re)) != ignoredExternals.end()) return;
    40             auto def = cast<Name>(re)->getDefinition();
    41             if (def != nullptr)
    42                 collect(def);
    43         } else if (isa<Seq>(re)) {
    44             for (auto item : *cast<Seq>(re)) {
    45                 collect(item);
    46             }
    47         } else if (isa<Alt>(re)) {
    48             for (auto item : *cast<Alt>(re)) {
    49                 collect(item);
    50             }
    51         } else if (isa<Rep>(re)) {
    52             collect(cast<Rep>(re)->getRE());
    53         } else if (isa<Assertion>(re)) {
    54             collect(cast<Assertion>(re)->getAsserted());
    55         } else if (isa<Diff>(re)) {
    56             collect(cast<Diff>(re)->getLH());
    57             collect(cast<Diff>(re)->getRH());
    58         } else if (isa<Intersect>(re)) {
    59             collect(cast<Intersect>(re)->getLH());
    60             collect(cast<Intersect>(re)->getRH());
    6139        }
     40    } else if (isa<Name>(re)) {
     41        if (ignoredExternals.find(cast<Name>(re)) != ignoredExternals.end()) return;
     42        auto def = cast<Name>(re)->getDefinition();
     43        if (def != nullptr)
     44            collect(def);
     45    } else if (isa<Seq>(re)) {
     46        for (auto item : *cast<Seq>(re)) {
     47            collect(item);
     48        }
     49    } else if (isa<Alt>(re)) {
     50        for (auto item : *cast<Alt>(re)) {
     51            collect(item);
     52        }
     53    } else if (isa<Rep>(re)) {
     54        collect(cast<Rep>(re)->getRE());
     55    } else if (isa<Assertion>(re)) {
     56        collect(cast<Assertion>(re)->getAsserted());
     57    } else if (isa<Diff>(re)) {
     58        collect(cast<Diff>(re)->getLH());
     59        collect(cast<Diff>(re)->getRH());
     60    } else if (isa<Intersect>(re)) {
     61        collect(cast<Intersect>(re)->getLH());
     62        collect(cast<Intersect>(re)->getRH());
    6263    }
    6364}
Note: See TracChangeset for help on using the changeset viewer.