Ignore:
Timestamp:
Mar 29, 2018, 11:24:15 AM (18 months ago)
Author:
cameron
Message:

Multithreaded simple RE mode initial check-in

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
4 edited
2 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5921 r5934  
    9797# RegExpADT is the core library for representing, parsing and printing regular expressions
    9898add_library(RegExpADT re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/re_range.cpp re/re_assertion.cpp re/printer_re.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/parse_fixed_strings.cpp)
    99 add_library(RegExpCompiler re/casing.cpp re/to_utf8.cpp re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_utility.cpp re/grapheme_clusters.cpp re/exclude_CC.cpp)
     99add_library(RegExpCompiler re/casing.cpp re/to_utf8.cpp re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/collect_ccs.cpp re/re_multiplex.cpp re/re_utility.cpp re/grapheme_clusters.cpp re/exclude_CC.cpp re/replaceCC.cpp)
    100100add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    101101add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp

    r5836 r5934  
    3131//
    3232
    33 std::map<UCD::codepoint_t, boost::dynamic_bitset<>> computeBreakpoints(const std::vector<const re::CC *> & CCs) {
     33std::map<UCD::codepoint_t, boost::dynamic_bitset<>> computeBreakpoints(const std::vector<re::CC *> & CCs) {
    3434    std::map<UCD::codepoint_t, boost::dynamic_bitset<>> breakpoints;
    3535    for (unsigned i = 0; i < CCs.size(); i++) {
     
    5252}
    5353
    54 void doMultiplexCCs(const std::vector<const re::CC *> & CCs,
     54void doMultiplexCCs(const std::vector<re::CC *> & CCs,
    5555                    std::vector<std::vector<unsigned>> & exclusiveSetIDs,
    5656                    std::vector<re::CC *> & multiplexedCCs) {
     
    113113
    114114
    115 MultiplexedAlphabet::MultiplexedAlphabet(std::string alphabetName, const std::vector<const re::CC *> CCs)
     115MultiplexedAlphabet::MultiplexedAlphabet(std::string alphabetName, const std::vector<re::CC *> CCs)
    116116    : Alphabet(alphabetName, ClassTypeId::MultiplexedAlphabet), mUnicodeSets(CCs) {
    117117        if (CCs.size() > 0) {
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.h

    r5836 r5934  
    1515class MultiplexedAlphabet : public Alphabet {
    1616public:
    17     MultiplexedAlphabet(std::string alphabetName, const std::vector<const re::CC *> CCs);
     17    MultiplexedAlphabet(std::string alphabetName, const std::vector<re::CC *> CCs);
    1818    static inline bool classof(const Alphabet * a) {
    1919        return a->getClassTypeId() == ClassTypeId::MultiplexedAlphabet;
     
    4040private:
    4141    const Alphabet * mSourceAlphabet;
    42     const std::vector<const re::CC *> mUnicodeSets;
     42    const std::vector<re::CC *> mUnicodeSets;
    4343    std::vector<std::vector<unsigned>> mExclusiveSetIDs;
    4444    std::vector<re::CC *> mMultiplexedCCs;
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5932 r5934  
    2424#include <kernels/kernel_builder.h>
    2525#include <pablo/pablo_kernel.h>
     26#include <cc/alphabet.h>
    2627#include <re/re_cc.h>
    2728#include <re/re_name.h>
     
    3435#include <re/re_name_resolve.h>
    3536#include <re/re_name_gather.h>
    36 #include <re/re_collect_unicodesets.h>
     37#include <re/collect_ccs.h>
     38#include <re/replaceCC.h>
    3739#include <re/re_multiplex.h>
    3840#include <re/grapheme_clusters.h>
     
    6163static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
    6264static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
    63 
     65static cl::opt<bool> MultithreadedSimpleRE("enable-simple-RE-kernels", cl::desc("Enable individual CC kernels for simple REs."), cl::init(false));
    6466const unsigned DefaultByteCClimit = 6;
    6567
     
    267269    }
    268270    if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
     271        std::vector<std::string> externalStreamNames;
     272        std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
     273        if (MultithreadedSimpleRE) {
     274            auto CCs = re::collectCCs(mREs[0], &cc::Byte);
     275            for (auto cc : CCs) {
     276                auto ccName = makeName(cc);
     277                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
     278                std::string ccNameStr = ccName->getFullName();
     279                errs () << "Replacing: " << ccNameStr << "\n";
     280                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     281                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc}, 1);
     282                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
     283                externalStreamNames.push_back(ccNameStr);
     284                icgrepInputSets.push_back(ccStream);
     285            }
     286        }
    269287        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    270         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0]);
    271         mGrepDriver->makeKernelCall(icgrepK, {ByteStream}, {MatchResults});
     288        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
     289        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    272290        MatchResultsBufs[0] = MatchResults;
    273291        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC}, 1);
     
    353371            }
    354372            if (CC_Multiplexing) {
    355                 const auto UnicodeSets = re::collectUnicodeSets(mREs[i], std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     373                const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    356374                StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    357375                if (UnicodeSets.size() <= 1) {
  • icGREP/icgrep-devel/icgrep/re/collect_ccs.cpp

    r5933 r5934  
    1 #include "re_collect_unicodesets.h"
     1#include "collect_ccs.h"
     2#include <cc/alphabet.h>
    23#include <re/re_name.h>
    34#include <re/re_any.h>
     
    2122    void collect(RE * const re);
    2223public:
    23     std::vector<const CC *> UnicodeSets;
     24    const cc::Alphabet * alphabet;
     25    std::vector<CC *> theSets;
    2426    boost::container::flat_set<const RE *>  Visited;
    2527    std::set<Name *> ignoredExternals;
     
    3032    if (Visited.insert(re).second) {
    3133        if (CC * cc = dyn_cast<CC>(re)) {
    32             if (cc->getAlphabet() == &cc::Unicode) {
    33                 const auto index = find(UnicodeSets.begin(), UnicodeSets.end(), cc) - UnicodeSets.begin();
    34                 if (index == UnicodeSets.size()) UnicodeSets.push_back(cc);
     34            if (cc->getAlphabet() == alphabet) {
     35                const auto index = find(theSets.begin(), theSets.end(), cc) - theSets.begin();
     36                if (index == theSets.size()) theSets.push_back(cc);
    3537            }
    3638        } else if (isa<Name>(re)) {
     
    6163}
    6264
    63 std::vector<const CC *> collectUnicodeSets(RE * const re, std::set<Name *> external) {
     65std::vector<CC *> collectCCs(RE * const re, const cc::Alphabet * a, std::set<Name *> external) {
    6466    SetCollector collector;
     67    collector.alphabet = a;
    6568    collector.ignoredExternals = external;
    6669    collector.collect(re);
    67     return collector.UnicodeSets;
     70    return collector.theSets;
    6871}
    6972
  • icGREP/icgrep-devel/icgrep/re/collect_ccs.h

    r5933 r5934  
    1 #ifndef RE_COLLECT_UNICODESETS_H
    2 #define RE_COLLECT_UNICODESETS_H
     1#ifndef COLLECT_CCS_H
     2#define COLLECT_CCS_H
    33
    44#include <vector>
    55#include <set>
    66
     7namespace cc {class Alphabet;}
    78namespace re {
    89
     
    1112    class Name;
    1213
    13     std::vector<const CC *> collectUnicodeSets(RE * const re, std::set<Name *> external = {});
     14    std::vector<CC *> collectCCs(RE * const re, const cc::Alphabet * a, std::set<Name *> external = {});
    1415
    1516}
Note: See TracChangeset for help on using the changeset viewer.