source: icGREP/icgrep-devel/icgrep/kernels/grapheme_kernel.cpp @ 6161

Last change on this file since 6161 was 6161, checked in by cameron, 7 months ago

Simplify Unicode name and anchor resolution, excludeCC

File size: 1.9 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grapheme_kernel.h"
7#include <re/re_toolchain.h>
8#include <re/re_name.h>
9#include <cc/cc_compiler.h>         // for CC_Compiler
10#include <UCD/ucd_compiler.hpp>
11#include <re/re_compiler.h>
12#include <re/grapheme_clusters.h>
13#include <re/re_name_gather.h>
14#include <re/re_name_resolve.h>
15#include <re/to_utf8.h>
16#include <pablo/pablo_toolchain.h>
17#include <kernels/kernel_builder.h>
18#include <pablo/builder.hpp>
19
20using namespace kernel;
21using namespace pablo;
22
23
24GraphemeClusterBreakKernel::GraphemeClusterBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
25: PabloKernel(iBuilder,
26              "gcb",
27              {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1), "nonFinal"}},
28              {Binding{iBuilder->getStreamSetTy(1, 1), "\\b{g}", FixedRate(), Add1()}}) {
29}
30
31void GraphemeClusterBreakKernel::generatePabloMethod() {
32    PabloBuilder pb(getEntryScope());
33    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
34    UCD::UCDCompiler ucdCompiler(ccc);
35    re::RE_Compiler re_compiler(getEntryScope(), ccc);
36    re::RE * GCB = re::generateGraphemeClusterBoundaryRule();
37    std::set<re::Name *> externals;
38    re::gatherUnicodeProperties(GCB, externals);
39    UCD::UCDCompiler::NameMap nameMap;
40    for (auto & name : externals) {
41        nameMap.emplace(name, nullptr);
42    }
43    GCB = resolveUnicodeNames(GCB);
44    ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
45    re_compiler.addPrecompiled("UTF8_nonfinal", pb.createExtract(getInputStreamVar("nonFinal"), pb.getInteger(0)));
46    PabloAST * const gcb = re_compiler.compile(GCB);
47    Var * const breaks = getOutputStreamVar("\\b{g}");
48    pb.createAssign(pb.createExtract(breaks, pb.getInteger(0)), gcb);
49}
50
Note: See TracBrowser for help on using the repository browser.