Ignore:
Timestamp:
Mar 12, 2018, 7:22:06 AM (16 months ago)
Author:
cameron
Message:

Initial deployment of bytegrep kernel in icgrep

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5872 r5902  
    88#include <cc/cc_compiler.h>
    99#include <kernels/kernel_builder.h>
     10#include <llvm/Support/raw_ostream.h>
    1011
    1112using namespace cc;
     
    2324, mCharClasses(charClasses)
    2425, mCodeUnitSize(codeUnitSize) {
    25 
     26    if (codeUnitSize > 4) errs() << "codeUnitsize of " << codeUnitSize << " too large!\n";
    2627}
    2728
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5900 r5902  
    77#include <boost/uuid/sha1.hpp>
    88#include <re/printer_re.h>
     9#include <re/re_cc.h>
     10#include <re/re_name.h>
    911#include <re/re_toolchain.h>
    1012#include <re/re_reverse.h>
     
    2426#include <cc/multiplex_CCs.h>
    2527#include <re/re_compiler.h>
     28#include <UCD/ucd_compiler.hpp>
    2629#include <llvm/Support/raw_ostream.h>
    2730
     
    4043             digest[0], digest[1], digest[2], digest[3], digest[4]);
    4144    return std::string(buffer);
     45}
     46
     47
     48UnicodeLineBreakKernel::UnicodeLineBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & kb)
     49: PabloKernel(kb,
     50              "UTF8_LB",
     51              {Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
     52              {Binding{kb->getStreamSetTy(1, 1), "UTF8_LB", FixedRate()}}) {
     53}
     54
     55void UnicodeLineBreakKernel::generatePabloMethod() {
     56        PabloBuilder pb(getEntryScope());
     57        cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
     58        UCD::UCDCompiler ucdCompiler(ccc);
     59   
     60    Name * breakChars = re::makeName("breakChars", makeCC(makeCC(makeCC(0x0A, 0x0D), makeCC(0x85)), makeCC(0x2028,0x2029)));
     61    UCD::UCDCompiler::NameMap nameMap;
     62    nameMap.emplace(breakChars, nullptr);
     63    ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
     64    auto f = nameMap.find(breakChars);
     65    if (f == nameMap.end()) llvm::report_fatal_error("UnicodeLineBreakKernel compilation failure");
     66    PabloAST * breakStream = f-> second;
     67    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
     68    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
     69    Var * const CR_before_LF = pb.createVar("CR_before_LFCR_before_LF", pb.createZeroes());
     70    auto crb = pb.createScope();
     71    pb.createIf(CR, crb);
     72    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
     73    crb.createAssign(CR_before_LF, crb.createAnd(CR, lookaheadLF));
     74    breakStream = pb.createXor(breakStream, CR_before_LF);  // Remove CR_before_LF from breakStream
     75    Var * const UTF8_LB = getOutputStreamVar("UTF8_LB");
     76    pb.createAssign(pb.createExtract(UTF8_LB, pb.getInteger(0)), breakStream);
    4277}
    4378
     
    242277}
    243278
     279
     280ByteGrepSignature::ByteGrepSignature(RE * re)
     281: mRE(re)
     282, mSignature(Printer_RE::PrintRE(re) ) {
     283}
     284
     285ByteGrepKernel::ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals)
     286: ByteGrepSignature(re)
     287, PabloKernel(b, "bBc" + sha1sum(mSignature),
     288              // inputs
     289{Binding{b->getStreamSetTy(1, 8), "byteData"}},
     290              // output
     291{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
     292, mExternals(externals) {
     293    for (auto & e : externals) {
     294        mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
     295    }
     296}
     297
     298std::string ByteGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
     299    return mSignature;
     300}
     301
     302
     303void ByteGrepKernel::generatePabloMethod() {
     304    PabloBuilder pb(getEntryScope());
     305    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
     306    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
     307    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
     308    for (auto & e : mExternals) {
     309        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
     310    }
     311    PabloAST * const matches = re_byte_compiler.compile(mRE);
     312   
     313    Var * const output = getOutputStreamVar("matches");
     314    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
     315}
     316
    244317// Helper to compute stream set inputs to pass into PabloKernel constructor.
    245318inline std::vector<Binding> byteBitGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
    246                                          const std::vector<std::string> & externals) {
     319                                              const std::vector<std::string> & externals) {
    247320    std::vector<Binding> streamSetInputs = {
    248321        Binding{b->getStreamSetTy(1, 8), "bytedata"},
     
    254327}
    255328
    256 
    257329ByteBitGrepSignature::ByteBitGrepSignature(RE * prefix, RE * suffix)
    258330: mPrefixRE(prefix)
    259331, mSuffixRE(suffix)
    260332, mSignature(Printer_RE::PrintRE(mPrefixRE) + Printer_RE::PrintRE(mSuffixRE) ) {
    261    
    262333}
    263334
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5889 r5902  
    1414
    1515   
     16class UnicodeNonFinalKernel : public pablo::PabloKernel {
     17public:
     18    UnicodeNonFinalKernel(const std::unique_ptr<kernel::KernelBuilder> & kb);
     19    bool isCachable() const override { return true; }
     20    bool hasSignature() const override { return false; }
     21protected:
     22    void generatePabloMethod() override;
     23};
     24
     25class UnicodeLineBreakKernel : public pablo::PabloKernel {
     26public:
     27    UnicodeLineBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & kb);
     28    bool isCachable() const override { return true; }
     29    bool hasSignature() const override { return false; }
     30protected:
     31    void generatePabloMethod() override;
     32};
     33
    1634class RequiredStreams_UTF8 : public pablo::PabloKernel {
    1735public:
     
    5270};
    5371
     72struct ByteGrepSignature {
     73    ByteGrepSignature(re::RE * re);
     74protected:
     75    re::RE * const  mRE;
     76    std::string     mSignature;
     77};
     78
     79
     80class ByteGrepKernel : public ByteGrepSignature, public pablo::PabloKernel {
     81public:
     82    ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re, std::vector<std::string> externals = {});
     83    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     84    bool isCachable() const override { return true; }
     85protected:
     86    void generatePabloMethod() override;
     87    std::vector<std::string> mExternals;
     88};
     89   
    5490struct ByteBitGrepSignature {
    5591    ByteBitGrepSignature(re::RE * prefix, re::RE * suffix);
Note: See TracChangeset for help on using the changeset viewer.