source: icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp @ 5787

Last change on this file since 5787 was 5787, checked in by cameron, 9 months ago

RE parser restructuring; parsing symbolic ranges, collation and equivalence exprs

File size: 3.4 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "charclasses.h"
7#include <re/re_toolchain.h>
8#include <kernels/kernel_builder.h>
9#include <UCD/ucd_compiler.hpp>
10#include <cc/cc_compiler.h>
11#include <re/re_name.h>
12#include <boost/uuid/sha1.hpp>
13#include <pablo/builder.hpp>
14#include <llvm/Support/raw_ostream.h>
15
16using NameMap = UCD::UCDCompiler::NameMap;
17
18using namespace cc;
19using namespace kernel;
20using namespace pablo;
21using namespace re;
22using namespace llvm;
23using namespace UCD;
24
25inline static std::string sha1sum(const std::string & str) {
26    char buffer[41];    // 40 hex-digits and the terminating null
27    uint32_t digest[5]; // 160 bits in total
28    boost::uuids::detail::sha1 sha1;
29    sha1.process_bytes(str.c_str(), str.size());
30    sha1.get_digest(digest);
31    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
32             digest[0], digest[1], digest[2], digest[3], digest[4]);
33    return std::string(buffer);
34}
35
36inline std::string signature(const std::vector<re::CC *> & ccs) {
37    if (LLVM_UNLIKELY(ccs.empty())) {
38        return "[]";
39    } else {
40        std::string tmp;
41        raw_string_ostream out(tmp);
42        char joiner = '[';
43        for (const auto & set : ccs) {
44            out << joiner;
45            set->print(out);
46            joiner = ',';
47        }
48        out << ']';
49        return out.str();
50    }
51}
52
53CharClassesSignature::CharClassesSignature(const std::vector<CC *> &ccs)
54: mSignature(signature(ccs)) {
55
56}
57
58
59CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<CC *> && ccs)
60: CharClassesSignature(ccs)
61, PabloKernel(iBuilder,
62              "cc" + sha1sum(mSignature),
63              {Binding{iBuilder->getStreamSetTy(8), "basis"}},
64              {Binding{iBuilder->getStreamSetTy(ccs.size(), 1), "charclasses"}})
65, mCCs(std::move(ccs)) {
66
67}
68
69std::string CharClassesKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
70    return mSignature;
71}
72
73void CharClassesKernel::generatePabloMethod() {
74    CC_Compiler ccc(this, getInput(0));
75    auto & pb = ccc.getBuilder();
76    unsigned n = mCCs.size();
77
78    NameMap nameMap;
79    std::vector<Name *> names;
80    for (unsigned i = 0; i < n; i++) {
81        Name * name = re::makeName("mpx_basis" + std::to_string(i), mCCs[i]);
82        nameMap.emplace(name, nullptr);
83        names.push_back(name);
84    }
85
86    UCD::UCDCompiler ucdCompiler(ccc);
87    if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
88        ucdCompiler.generateWithoutIfHierarchy(nameMap, pb);
89    } else {
90        ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
91    }
92
93    // The first UnicodeSet in the vector ccs represents the last bit of the character class basis bit streams.
94    std::reverse(names.begin(), names.end());
95    for (unsigned i = 0; i < names.size(); i++) {
96        auto t = nameMap.find(names[i]); 
97        if (t != nameMap.end()) {
98            PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(i));
99            if (t->first->getType() == Name::Type::Byte) {
100                pb.createAssign(r, ccc.compileCC(dyn_cast<CC>(t->first->getDefinition())));
101            } else {
102                pb.createAssign(r, pb.createInFile(t->second));
103            }
104        } else {
105            throw std::runtime_error("Can't compile character classes.");
106        }
107    }
108}
109
110
Note: See TracBrowser for help on using the repository browser.