Ignore:
Timestamp:
Feb 25, 2018, 12:38:51 PM (14 months ago)
Author:
cameron
Message:

Grapheme Cluster Break kernel

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5867 r5881  
    44 *  icgrep is a trademark of International Characters.
    55 */
    6 
     6#include <set>
    77#include "grep_engine.h"
    88#include "grep_interface.h"
     
    1313#include <kernels/cc_kernel.h>
    1414#include <kernels/grep_kernel.h>
     15#include <kernels/grapheme_kernel.h>
    1516#include <kernels/linebreak_kernel.h>
    1617#include <kernels/streams_merge.h>
     
    2324#include <pablo/pablo_kernel.h>
    2425#include <re/re_cc.h>
     26#include <re/re_name.h>
    2527#include <re/casing.h>
    2628#include <re/exclude_CC.h>
     
    5355static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(2));
    5456static cl::opt<bool> PabloTransposition("enable-pablo-s2p", cl::desc("Enable experimental pablo transposition."));
    55 static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(true));
     57static cl::opt<bool> CC_Multiplexing("CC-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false));
    5658
    5759namespace grep {
     
    147149        REs[i] = resolveModesAndExternalSymbols(REs[i]);
    148150        REs[i] = excludeUnicodeLineBreak(REs[i]);
     151        //re::Name * unicodeLB = re::makeName("UTF8_LB", re::Name::Type::Unicode);
     152        //unicodeLB->setDefinition(re::makeCC(0x0A));
     153        //REs[i] = resolveAnchors(REs[i], unicodeLB);
    149154        REs[i] = regular_expression_passes(REs[i]);
     155        bool hasGCB = hasGraphemeClusterBoundary(REs[i]);
     156        StreamSetBuffer * GCB_stream = nullptr;
     157        std::vector<std::string> externalStreamNames = std::vector<std::string>{"UTF8_LB", "UTF8_nonfinal"};
     158        std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits, LineBreakStream, RequiredStreams};
     159        if (hasGCB) {
     160            GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     161            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
     162            mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
     163            externalStreamNames.push_back("\\b{g}");
     164            icgrepInputSets.push_back(GCB_stream);
     165        }
    150166        if (CC_Multiplexing) {
    151             const auto UnicodeSets = re::collectUnicodeSets(REs[i]);
     167            const auto UnicodeSets = re::collectUnicodeSets(REs[i], std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    152168            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    153169            if (UnicodeSets.size() <= 1) {
    154                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i]);
    155                 mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
     170                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames);
     171                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    156172                MatchResultsBufs[i] = MatchResults;
    157173            } else {
     
    165181//                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    166182//                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
    167                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], std::vector<cc::Alphabet *>{mpx.get()});
    168                 mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams, CharClasses}, {MatchResults});
     183                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
     184                icgrepInputSets.push_back(CharClasses);
     185                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    169186                MatchResultsBufs[i] = MatchResults;
    170187            }
    171188        } else {
    172189            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    173             kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i]);
    174             mGrepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
     190            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], externalStreamNames);
     191            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    175192            MatchResultsBufs[i] = MatchResults;
    176193        }
Note: See TracChangeset for help on using the changeset viewer.