source: icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp @ 5805

Last change on this file since 5805 was 5805, checked in by cameron, 13 months ago

Name::Type::Byte removed in favor of cc::Byte alphabet; other cleanups

File size: 3.2 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "charclasses.h"
7#include <re/re_toolchain.h>
8#include <kernels/kernel_builder.h>
9#include <UCD/ucd_compiler.hpp>
10#include <cc/cc_compiler.h>
11#include <re/re_name.h>
12#include <boost/uuid/sha1.hpp>
13#include <pablo/builder.hpp>
14#include <llvm/Support/ErrorHandling.h>
15#include <llvm/Support/raw_ostream.h>
16
17using NameMap = UCD::UCDCompiler::NameMap;
18
19using namespace cc;
20using namespace kernel;
21using namespace pablo;
22using namespace re;
23using namespace llvm;
24using namespace UCD;
25
26inline static std::string sha1sum(const std::string & str) {
27    char buffer[41];    // 40 hex-digits and the terminating null
28    uint32_t digest[5]; // 160 bits in total
29    boost::uuids::detail::sha1 sha1;
30    sha1.process_bytes(str.c_str(), str.size());
31    sha1.get_digest(digest);
32    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
33             digest[0], digest[1], digest[2], digest[3], digest[4]);
34    return std::string(buffer);
35}
36
37inline std::string signature(const std::vector<re::CC *> & ccs) {
38    if (LLVM_UNLIKELY(ccs.empty())) {
39        return "[]";
40    } else {
41        std::string tmp;
42        raw_string_ostream out(tmp);
43        char joiner = '[';
44        for (const auto & set : ccs) {
45            out << joiner;
46            set->print(out);
47            joiner = ',';
48        }
49        out << ']';
50        return out.str();
51    }
52}
53
54CharClassesSignature::CharClassesSignature(const std::vector<CC *> &ccs)
55: mSignature(signature(ccs)) {
56
57}
58
59
60CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<CC *> && ccs)
61: CharClassesSignature(ccs)
62, PabloKernel(iBuilder,
63              "cc" + sha1sum(mSignature),
64              {Binding{iBuilder->getStreamSetTy(8), "basis"}},
65              {Binding{iBuilder->getStreamSetTy(ccs.size(), 1), "charclasses"}})
66, mCCs(std::move(ccs)) {
67
68}
69
70std::string CharClassesKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
71    return mSignature;
72}
73
74void CharClassesKernel::generatePabloMethod() {
75    CC_Compiler ccc(this, getInput(0));
76    auto & pb = ccc.getBuilder();
77    unsigned n = mCCs.size();
78
79    NameMap nameMap;
80    std::vector<Name *> names;
81    for (unsigned i = 0; i < n; i++) {
82        Name * name = re::makeName("mpx_basis" + std::to_string(i), mCCs[i]);
83        nameMap.emplace(name, nullptr);
84        names.push_back(name);
85    }
86
87    UCD::UCDCompiler ucdCompiler(ccc);
88    if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
89        ucdCompiler.generateWithoutIfHierarchy(nameMap, pb);
90    } else {
91        ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
92    }
93
94    // The first UnicodeSet in the vector ccs represents the last bit of the character class basis bit streams.
95    std::reverse(names.begin(), names.end());
96    for (unsigned i = 0; i < names.size(); i++) {
97        auto t = nameMap.find(names[i]); 
98        if (t != nameMap.end()) {
99            PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(i));
100            pb.createAssign(r, pb.createInFile(t->second));
101        } else {
102            llvm::report_fatal_error("Can't compile character classes.");
103        }
104    }
105}
106
107
Note: See TracBrowser for help on using the repository browser.