source: icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp @ 6126

Last change on this file since 6126 was 6126, checked in by cameron, 11 months ago

Big-endian numbering option for CharClasses? kernel

File size: 4.0 KB
RevLine 
[5564]1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "charclasses.h"
7#include <re/re_toolchain.h>
8#include <kernels/kernel_builder.h>
9#include <UCD/ucd_compiler.hpp>
10#include <cc/cc_compiler.h>
11#include <re/re_name.h>
[5620]12#include <boost/uuid/sha1.hpp>
[5786]13#include <pablo/builder.hpp>
[5805]14#include <llvm/Support/ErrorHandling.h>
[5750]15#include <llvm/Support/raw_ostream.h>
[5564]16
17using NameMap = UCD::UCDCompiler::NameMap;
18
19using namespace cc;
20using namespace kernel;
21using namespace pablo;
22using namespace re;
23using namespace llvm;
24using namespace UCD;
25
[5620]26inline static std::string sha1sum(const std::string & str) {
27    char buffer[41];    // 40 hex-digits and the terminating null
28    uint32_t digest[5]; // 160 bits in total
29    boost::uuids::detail::sha1 sha1;
30    sha1.process_bytes(str.c_str(), str.size());
31    sha1.get_digest(digest);
32    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
33             digest[0], digest[1], digest[2], digest[3], digest[4]);
34    return std::string(buffer);
35}
36
[5748]37inline std::string signature(const std::vector<re::CC *> & ccs) {
[5620]38    if (LLVM_UNLIKELY(ccs.empty())) {
39        return "[]";
40    } else {
41        std::string tmp;
42        raw_string_ostream out(tmp);
43        char joiner = '[';
44        for (const auto & set : ccs) {
45            out << joiner;
[5748]46            set->print(out);
[5620]47            joiner = ',';
48        }
49        out << ']';
50        return out.str();
51    }
52}
53
[6126]54CharClassesSignature::CharClassesSignature(const std::vector<CC *> &ccs, bool useDirectCC, cc::BitNumbering bn)
55: mUseDirectCC(useDirectCC),
56  mSignature((useDirectCC ? "d" : "p") + numberingSuffix(bn) + signature(ccs)) {
[5620]57}
58
59
[6119]60CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<CC *> && ccs, bool useDirectCC, cc::BitNumbering basisNumbering)
[6126]61: CharClassesSignature(ccs, useDirectCC, basisNumbering)
[5620]62, PabloKernel(iBuilder,
63              "cc" + sha1sum(mSignature),
[5854]64              {},
[5620]65              {Binding{iBuilder->getStreamSetTy(ccs.size(), 1), "charclasses"}})
[6119]66, mCCs(std::move(ccs)), mBasisSetNumbering(basisNumbering) {
[5854]67    if (useDirectCC) {
68        mStreamSetInputs.push_back({Binding{iBuilder->getStreamSetTy(1, 8), "byteData"}});
69    }
70    else {
71        mStreamSetInputs.push_back({Binding{iBuilder->getStreamSetTy(8), "basis"}});
72    }
[5564]73}
74
[5620]75std::string CharClassesKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
76    return mSignature;
77}
78
[5564]79void CharClassesKernel::generatePabloMethod() {
[5842]80    PabloBuilder pb(getEntryScope());
[5861]81    std::unique_ptr<CC_Compiler> ccc;
[5854]82    if (mUseDirectCC) {
[5872]83        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
[5861]84    } else {
[6119]85        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
[5854]86    }
[5620]87    unsigned n = mCCs.size();
[5564]88
89    NameMap nameMap;
90    std::vector<Name *> names;
91    for (unsigned i = 0; i < n; i++) {
[5787]92        Name * name = re::makeName("mpx_basis" + std::to_string(i), mCCs[i]);
[5564]93        nameMap.emplace(name, nullptr);
94        names.push_back(name);
95    }
96
[5861]97    UCD::UCDCompiler ucdCompiler(*ccc.get());
[5564]98    if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
99        ucdCompiler.generateWithoutIfHierarchy(nameMap, pb);
100    } else {
101        ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
102    }
[6126]103    if (mBasisSetNumbering == cc::BitNumbering::BigEndian) {
104        // The first UnicodeSet in the vector ccs represents the last bit of the
105        // character class basis bit streams.
106        std::reverse(names.begin(), names.end());
107    }
[5564]108    for (unsigned i = 0; i < names.size(); i++) {
109        auto t = nameMap.find(names[i]); 
110        if (t != nameMap.end()) {
111            PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(i));
[5805]112            pb.createAssign(r, pb.createInFile(t->second));
[5564]113        } else {
[5805]114            llvm::report_fatal_error("Can't compile character classes.");
[5564]115        }
116    }
117}
118
119
Note: See TracBrowser for help on using the repository browser.