Ignore:
Timestamp:
Jul 11, 2017, 1:57:37 PM (2 years ago)
Author:
cameron
Message:

Factor out required streams into a separate kernel

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5548 r5561  
    1111#include <kernels/kernel_builder.h>
    1212#include <pablo/builder.hpp>
     13#include <pablo/pe_ones.h>          // for Ones
     14#include <pablo/pe_var.h>           // for Var
     15#include <pablo/pe_zeroes.h>        // for Zeroes
    1316#include <pablo/boolean.h>
    1417#include <pablo/pe_count.h>
    1518#include <pablo/pe_matchstar.h>
     19#include "cc/cc_compiler.h"         // for CC_Compiler
    1620
    1721#include <llvm/Support/raw_ostream.h>
     
    3943}
    4044
     45void RequiredStreams_UTF8::generatePabloMethod() {
     46   
     47    cc::CC_Compiler ccc(this, getInput(0));
     48    auto & pb = ccc.getBuilder();
     49    Zeroes * const zero = pb.createZeroes();
     50    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
     51    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
     52
     53    Var * crlf = pb.createVar("crlf", zero);
     54    PabloBuilder crb = PabloBuilder::Create(pb);
     55    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
     56    crb.createAssign(crlf, crb.createAnd(cr1, LF));
     57    pb.createIf(CR, crb);
     58   
     59    Var * u8invalid = pb.createVar("u8invalid", zero);
     60    Var * valid_pfx = pb.createVar("valid_pfx", zero);
     61    Var * nonFinal = pb.createVar("nonfinal", zero);
     62    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
     63   
     64    PabloBuilder it = PabloBuilder::Create(pb);
     65
     66    pb.createIf(u8pfx, it);
     67    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
     68    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
     69    PabloAST * u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
     70    PabloAST * u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
     71   
     72    //
     73    // Two-byte sequences
     74    Var * u8scope22 = it.createVar("u8scope22", zero);
     75    PabloBuilder it2 = PabloBuilder::Create(it);
     76    it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
     77    it.createIf(u8pfx2, it2);
     78    //
     79    // Three-byte sequences
     80   
     81    Var * u8scope32 = it.createVar("u8scope32", zero);
     82    Var * u8scope3X = it.createVar("u8scope3X", zero);
     83    Var * EX_invalid = it.createVar("EX_invalid", zero);
     84    PabloBuilder it3 = PabloBuilder::Create(it);
     85    it.createIf(u8pfx3, it3);
     86    it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
     87    PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
     88    it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
     89    PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
     90    PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
     91    it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
     92   
     93    //
     94    // Four-byte sequences
     95    Var * u8scope4nonfinal = it.createVar("u8scope4nonfinal", zero);
     96    Var * u8scope4X = it.createVar("u8scope4X", zero);
     97    Var * FX_invalid = it.createVar("FX_invalid", zero);
     98    PabloBuilder it4 = PabloBuilder::Create(it);
     99    it.createIf(u8pfx4, it4);
     100    PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
     101    PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
     102    PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
     103    it4.createAssign(u8scope4nonfinal, it4.createOr(u8scope42, u8scope43));
     104    it4.createAssign(u8scope4X, it4.createOr(u8scope4nonfinal, u8scope44));
     105    PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
     106    PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
     107    it4.createAssign(FX_invalid, it4.createOr(F0_invalid, F4_invalid));
     108   
     109    //
     110    // Invalid cases
     111    PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
     112    PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
     113    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
     114    //  a scope is a mismatch, i.e., invalid UTF-8.
     115    PabloAST * mismatch = it.createXor(anyscope, u8suffix);
     116    //
     117    PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
     118    PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
     119    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
     120    PabloAST * u8valid = it.createNot(u8invalid, "u8valid");
     121    //
     122    //
     123   
     124    it.createAssign(valid_pfx, it.createAnd(u8pfx, u8valid));
     125    it.createAssign(nonFinal, it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
     126   
     127    PabloAST * u8single = pb.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), pb.createNot(u8invalid));
     128   
     129    Var * const required = getOutputStreamVar("required");
     130    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u8single, valid_pfx, "initial"));
     131    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
     132    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(nonFinal, u8invalid), "final"));
     133    pb.createAssign(pb.createExtract(required, pb.getInteger(3)), crlf);
     134}
     135
     136RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
     137: PabloKernel(kb, "RequiredStreams_UTF8",               
     138              {Binding{kb->getStreamSetTy(8), "basis"}},
     139              {Binding{kb->getStreamSetTy(4), "required"}},
     140              {},
     141              {}) {
     142}
     143
     144void RequiredStreams_UTF16::generatePabloMethod() {
     145   
     146    cc::CC_Compiler ccc(this, getInput(0));
     147    auto & pb = ccc.getBuilder();
     148   
     149    PabloAST * LF = ccc.compileCC("LF", makeCC(0x000A), pb);
     150    PabloAST * CR = ccc.compileCC("CR", makeCC(0x000D), pb);
     151    PabloAST * cr1 = pb.createAdvance(CR, 1, "cr1");
     152   
     153    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF));    //u16hi_hi_surrogate = [\xD8-\xDB]
     154    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF));    //u16hi_lo_surrogate = [\xDC-\xDF]
     155   
     156    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
     157    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
     158    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
     159   
     160    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF)), ccc.compileCC(makeCC(0xE000, 0xFFFF)));
     161    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
     162
     163    Var * const required = getOutputStreamVar("required");
     164    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u16single, u16hi_hi_surrogate, "initial"));
     165    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal"));
     166    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(u16hi_hi_surrogate, u16invalid), "final"));
     167    pb.createAssign(pb.createExtract(required, pb.getInteger(3)), pb.createAnd(cr1, LF, "crlf"));
     168}
     169
     170RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
     171: PabloKernel(kb, "RequiredStreams_UTF16",               
     172              {Binding{kb->getStreamSetTy(16), "basis"}},
     173              {Binding{kb->getStreamSetTy(4), "required"}},
     174              {},
     175              {}) {
     176}
     177
     178
     179
    41180ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re)
    42181: RegularExpressionOptimizer(re)
    43182, PabloKernel(iBuilder,
    44183              "ic" + sha1sum(mSignature),
    45               {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}},
     184              {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}, Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
    46185              {Binding{iBuilder->getStreamSetTy(1, 1), "matches"}}) {
    47186
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5548 r5561  
    1818    std::string     mSignature;
    1919};
     20
     21
     22   
     23class RequiredStreams_UTF8 : public pablo::PabloKernel {
     24public:
     25    RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb);
     26    bool isCachable() const override { return true; }
     27    bool hasSignature() const override { return false; }
     28protected:
     29    void generatePabloMethod() override;
     30};
     31
     32class RequiredStreams_UTF16 : public pablo::PabloKernel {
     33public:
     34    RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb);
     35    bool isCachable() const override { return true; }
     36    bool hasSignature() const override { return false; }
     37protected:
     38    void generatePabloMethod() override;
     39};
     40
    2041
    2142class ICGrepKernel : public RegularExpressionOptimizer, public pablo::PabloKernel {
Note: See TracChangeset for help on using the changeset viewer.