source: icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp @ 5718

Last change on this file since 5718 was 5620, checked in by nmedfort, 22 months ago

Bug fixes for multigrep mode. Optional PabloKernel? branch hit counter added. Minor optimizations.

File size: 3.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "charclasses.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/re_toolchain.h>
9#include <pablo/pablo_toolchain.h>
10#include <kernels/kernel_builder.h>
11#include <pablo/builder.hpp>
12#include <pablo/pe_count.h>
13#include <UCD/resolve_properties.h>
14#include <UCD/ucd_compiler.hpp>
15#include <re/re_cc.h>
16#include <cc/cc_compiler.h>
17#include <re/re_name.h>
18#include <llvm/Support/raw_ostream.h>
19#include <boost/uuid/sha1.hpp>
20
21using NameMap = UCD::UCDCompiler::NameMap;
22
23using namespace cc;
24using namespace kernel;
25using namespace pablo;
26using namespace re;
27using namespace llvm;
28using namespace UCD;
29
30inline static std::string sha1sum(const std::string & str) {
31    char buffer[41];    // 40 hex-digits and the terminating null
32    uint32_t digest[5]; // 160 bits in total
33    boost::uuids::detail::sha1 sha1;
34    sha1.process_bytes(str.c_str(), str.size());
35    sha1.get_digest(digest);
36    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
37             digest[0], digest[1], digest[2], digest[3], digest[4]);
38    return std::string(buffer);
39}
40
41inline std::string signature(const std::vector<UCD::UnicodeSet> & ccs) {
42    if (LLVM_UNLIKELY(ccs.empty())) {
43        return "[]";
44    } else {
45        std::string tmp;
46        raw_string_ostream out(tmp);
47        char joiner = '[';
48        for (const auto & set : ccs) {
49            out << joiner;
50            set.print(out);
51            joiner = ',';
52        }
53        out << ']';
54        return out.str();
55    }
56}
57
58CharClassesSignature::CharClassesSignature(const std::vector<UCD::UnicodeSet> & ccs)
59: mSignature(signature(ccs)) {
60
61}
62
63
64CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<UnicodeSet> && ccs)
65: CharClassesSignature(ccs)
66, PabloKernel(iBuilder,
67              "cc" + sha1sum(mSignature),
68              {Binding{iBuilder->getStreamSetTy(8), "basis"}},
69              {Binding{iBuilder->getStreamSetTy(ccs.size(), 1), "charclasses"}})
70, mCCs(std::move(ccs)) {
71
72}
73
74std::string CharClassesKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
75    return mSignature;
76}
77
78void CharClassesKernel::generatePabloMethod() {
79    CC_Compiler ccc(this, getInput(0));
80    auto & pb = ccc.getBuilder();
81    unsigned n = mCCs.size();
82
83    NameMap nameMap;
84    std::vector<Name *> names;
85    for (unsigned i = 0; i < n; i++) {
86        Name * name = re::makeName("cc" + std::to_string(i), makeCC(std::move(mCCs[i])));
87        nameMap.emplace(name, nullptr);
88        names.push_back(name);
89    }
90
91    UCD::UCDCompiler ucdCompiler(ccc);
92    if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
93        ucdCompiler.generateWithoutIfHierarchy(nameMap, pb);
94    } else {
95        ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
96    }
97
98    // The first UnicodeSet in the vector ccs represents the last bit of the character class basis bit streams.
99    std::reverse(names.begin(), names.end());
100    for (unsigned i = 0; i < names.size(); i++) {
101        auto t = nameMap.find(names[i]); 
102        if (t != nameMap.end()) {
103            PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(i));
104            if (t->first->getType() == Name::Type::Byte) {
105                pb.createAssign(r, ccc.compileCC(dyn_cast<CC>(t->first->getDefinition())));
106            } else {
107                pb.createAssign(r, t->second);
108            }
109        } else {
110            throw std::runtime_error("Can't compile character classes.");
111        }
112    }
113}
114
115
Note: See TracBrowser for help on using the repository browser.