Changeset 5887


Ignore:
Timestamp:
Mar 3, 2018, 1:30:02 PM (12 months ago)
Author:
cameron
Message:

Separate compilation of Unicode property kernels

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5881 r5887  
    112112target_link_libraries (RegExpCompiler UCDlib CCADT RegExpADT)
    113113
    114 add_executable(icgrep icgrep.cpp grep_interface.cpp grep_engine.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp kernels/grapheme_kernel.cpp)
     114add_executable(icgrep icgrep.cpp grep_interface.cpp grep_engine.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    115115add_executable(u8u16 u8u16.cpp)
    116116add_executable(base64 base64.cpp kernels/radix64.cpp)
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5881 r5887  
    1313#include <kernels/cc_kernel.h>
    1414#include <kernels/grep_kernel.h>
     15#include <kernels/UCD_property_kernel.h>
    1516#include <kernels/grapheme_kernel.h>
    1617#include <kernels/linebreak_kernel.h>
     
    3031#include <toolchain/toolchain.h>
    3132#include <re/re_name_resolve.h>
     33#include <re/re_name_gather.h>
    3234#include <re/re_collect_unicodesets.h>
    3335#include <re/re_multiplex.h>
     
    5658static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
    5759static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
     60static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
    5861
    5962namespace grep {
     
    115118    const unsigned baseBufferSize = segmentSize * (MaxCountFlag > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
    116119    const unsigned encodingBits = 8;
    117 
     120   
     121   
     122    //  Regular Expression Processing and Analysis Phase
     123    const auto nREs = REs.size();
     124    bool hasGCB[nREs];
     125    bool anyGCB = false;
     126   
     127    std::set<re::Name *> UnicodeProperties;
     128   
     129    for(unsigned i = 0; i < nREs; ++i) {
     130        REs[i] = resolveModesAndExternalSymbols(REs[i]);
     131        REs[i] = excludeUnicodeLineBreak(REs[i]);
     132        re::gatherUnicodeProperties(REs[i], UnicodeProperties);
     133       //re::Name * unicodeLB = re::makeName("UTF8_LB", re::Name::Type::Unicode);
     134        //unicodeLB->setDefinition(re::makeCC(0x0A));
     135        //REs[i] = resolveAnchors(REs[i], unicodeLB);
     136        REs[i] = regular_expression_passes(REs[i]);
     137        hasGCB[i] = hasGraphemeClusterBoundary(REs[i]);
     138        anyGCB |= hasGCB[i];
     139    }
     140   
    118141    StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    119142
     
    144167    mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, LineBreakStream});
    145168
    146     const auto n = REs.size();
    147     std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    148     for(unsigned i = 0; i < n; ++i) {
     169   
     170    std::map<std::string, StreamSetBuffer *> propertyStream;
     171    if (PropertyKernels) {
     172        for (auto p : UnicodeProperties) {
     173            auto name = p->getFullName();
     174            StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     175            propertyStream.emplace(std::make_pair(name, s));
     176            kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
     177            mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
     178        }
     179    }
     180    StreamSetBuffer * GCB_stream = nullptr;
     181    if (anyGCB) {
     182        GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     183        kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
     184        mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
     185    }
     186
     187    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
     188    for(unsigned i = 0; i < nREs; ++i) {
    149189        REs[i] = resolveModesAndExternalSymbols(REs[i]);
    150190        REs[i] = excludeUnicodeLineBreak(REs[i]);
     
    153193        //REs[i] = resolveAnchors(REs[i], unicodeLB);
    154194        REs[i] = regular_expression_passes(REs[i]);
    155         bool hasGCB = hasGraphemeClusterBoundary(REs[i]);
    156         StreamSetBuffer * GCB_stream = nullptr;
    157195        std::vector<std::string> externalStreamNames = std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"};
    158196        std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits, LineBreakStream, RequiredStreams};
    159         if (hasGCB) {
    160             GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    161             kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
    162             mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
     197        std::set<re::Name *> UnicodeProperties;
     198        if (PropertyKernels) {
     199            re::gatherUnicodeProperties(REs[i], UnicodeProperties);
     200            for (auto p : UnicodeProperties) {
     201                auto name = p->getFullName();
     202                auto f = propertyStream.find(name);
     203                if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
     204                externalStreamNames.push_back(name);
     205                icgrepInputSets.push_back(f->second);
     206            }
     207        }
     208        if (hasGCB[i]) {
    163209            externalStreamNames.push_back("\\b{g}");
    164210            icgrepInputSets.push_back(GCB_stream);
  • icGREP/icgrep-devel/icgrep/kernels/grapheme_kernel.cpp

    r5881 r5887  
    3535    re::RE_Compiler re_compiler(this, ccc);
    3636    re::RE * GCB = re::generateGraphemeClusterBoundaryRule();
    37     std::set<re::Name *> externals = re::gatherExternalNames(GCB);
     37    std::set<re::Name *> externals;
     38    re::gatherUnicodeProperties(GCB, externals);
    3839    UCD::UCDCompiler::NameMap nameMap;
    3940    for (auto & name : externals) {
  • icGREP/icgrep-devel/icgrep/re/exclude_CC.cpp

    r5847 r5887  
    6767                }
    6868                std::string cc_name = n->getName() + "--" + cc->canonicalName();
    69                 return makeName(cc_name, n->getType(), exclude_CC(defn, cc));
     69                return makeName(cc_name, Name::Type::Unicode, exclude_CC(defn, cc));
    7070                /*
    7171                return exclude_CC(defn, cc);
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r5847 r5887  
    3232    bool hasNamespace() const;
    3333    std::string getName() const;
     34    std::string getFullName() const;
    3435    Type getType() const;
    3536    RE * getDefinition() const;
     
    8788    return std::string(mName, mNameLength);
    8889}
    89    
     90
     91inline std::string Name::getFullName() const {
     92    if (hasNamespace()) return getNamespace() + "=" + getName();
     93    else return getName();
     94}
     95
    9096inline Name::Type Name::getType() const {
    9197    return mType;
  • icGREP/icgrep-devel/icgrep/re/re_name_gather.cpp

    r5881 r5887  
    2121using namespace llvm;
    2222namespace re {
    23 struct NameGather {
    24 
    25     void gather(RE * re) {
    26         assert ("RE object cannot be null!" && re);
    27         if (isa<Name>(re)) {
    28             RE * defn = cast<Name>(re)->getDefinition();
    29             if (defn == nullptr) {
    30                 mNameSet.emplace(cast<Name>(re));
    31             }
    32         } else if (isa<Seq>(re)) {
    33             for (RE * item : *cast<Seq>(re)) {
    34                 gather(item);
    35             }
    36         } else if (isa<Alt>(re)) {
    37             for (RE * item : *cast<Alt>(re)) {
    38                 gather(item);
    39             }
    40         } else if (isa<Rep>(re)) {
    41             gather(cast<Rep>(re)->getRE());
    42         } else if (isa<Assertion>(re)) {
    43             gather(cast<Assertion>(re)->getAsserted());
    44         } else if (Range * rg = dyn_cast<Range>(re)) {
    45             gather(rg->getLo());
    46             gather(rg->getHi());
    47         } else if (isa<Diff>(re)) {
    48             gather(cast<Diff>(re)->getLH());
    49             gather(cast<Diff>(re)->getRH());
    50         } else if (isa<Intersect>(re)) {
    51             gather(cast<Intersect>(re)->getLH());
    52             gather(cast<Intersect>(re)->getRH());
    53         } else if (isa<Group>(re)) {
    54             gather(cast<Group>(re)->getRE());
     23   
     24   
     25void gatherUnicodeProperties (RE * re, std::set<Name *> & nameSet) {
     26    if (isa<Name>(re)) {
     27        if (cast<Name>(re)->getType() == Name::Type::UnicodeProperty) {
     28            nameSet.emplace(cast<Name>(re));
    5529        }
     30    } else if (isa<Seq>(re)) {
     31        for (RE * item : *cast<Seq>(re)) {
     32            gatherUnicodeProperties(item, nameSet);
     33        }
     34    } else if (isa<Alt>(re)) {
     35        for (RE * item : *cast<Alt>(re)) {
     36            gatherUnicodeProperties(item, nameSet);
     37        }
     38    } else if (isa<Rep>(re)) {
     39        gatherUnicodeProperties(cast<Rep>(re)->getRE(), nameSet);
     40    } else if (isa<Assertion>(re)) {
     41        gatherUnicodeProperties(cast<Assertion>(re)->getAsserted(), nameSet);
     42    } else if (Range * rg = dyn_cast<Range>(re)) {
     43        gatherUnicodeProperties(rg->getLo(), nameSet);
     44        gatherUnicodeProperties(rg->getHi(), nameSet);
     45    } else if (isa<Diff>(re)) {
     46        gatherUnicodeProperties(cast<Diff>(re)->getLH(), nameSet);
     47        gatherUnicodeProperties(cast<Diff>(re)->getRH(), nameSet);
     48    } else if (isa<Intersect>(re)) {
     49        gatherUnicodeProperties(cast<Intersect>(re)->getLH(), nameSet);
     50        gatherUnicodeProperties(cast<Intersect>(re)->getRH(), nameSet);
     51    } else if (isa<Group>(re)) {
     52        gatherUnicodeProperties(cast<Group>(re)->getRE(), nameSet);
    5653    }
    57     NameGather(std::set<Name *> & nameSet)
    58     : mNameSet(nameSet) {
    59 
    60     }
    61 
    62 private:
    63 
    64     std::set<Name *> &               mNameSet;
    65 
    66 };
    67    
    68 std::set<Name *> gatherExternalNames(RE * re) {
    69     std::set<Name *> nameSet;
    70    
    71     NameGather nameGather(nameSet);
    72     nameGather.gather(re);
    73     return nameSet;
    74    
    7554}
    7655
  • icGREP/icgrep-devel/icgrep/re/re_name_gather.h

    r5881 r5887  
    99    class RE; class Name;
    1010
    11     std::set<Name *> gatherExternalNames(RE * re);
     11    void gatherUnicodeProperties (RE * r, std::set<Name *> & nameSet);
    1212
    1313}
Note: See TracChangeset for help on using the changeset viewer.