source: icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp @ 6119

Last change on this file since 6119 was 6119, checked in by xwa163, 10 months ago
  1. Add some BasisSetNumbering? option to fix bug of multiplexing
  2. Use BiigEndian? BitNumbering? for lz4 and lzparabix related pipeline
  3. Support multiplexing in LZ4BitStreamAio pipeline
File size: 3.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "charclasses.h"
7#include <re/re_toolchain.h>
8#include <kernels/kernel_builder.h>
9#include <UCD/ucd_compiler.hpp>
10#include <cc/cc_compiler.h>
11#include <re/re_name.h>
12#include <boost/uuid/sha1.hpp>
13#include <pablo/builder.hpp>
14#include <llvm/Support/ErrorHandling.h>
15#include <llvm/Support/raw_ostream.h>
16
17using NameMap = UCD::UCDCompiler::NameMap;
18
19using namespace cc;
20using namespace kernel;
21using namespace pablo;
22using namespace re;
23using namespace llvm;
24using namespace UCD;
25
26inline static std::string sha1sum(const std::string & str) {
27    char buffer[41];    // 40 hex-digits and the terminating null
28    uint32_t digest[5]; // 160 bits in total
29    boost::uuids::detail::sha1 sha1;
30    sha1.process_bytes(str.c_str(), str.size());
31    sha1.get_digest(digest);
32    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
33             digest[0], digest[1], digest[2], digest[3], digest[4]);
34    return std::string(buffer);
35}
36
37inline std::string signature(const std::vector<re::CC *> & ccs) {
38    if (LLVM_UNLIKELY(ccs.empty())) {
39        return "[]";
40    } else {
41        std::string tmp;
42        raw_string_ostream out(tmp);
43        char joiner = '[';
44        for (const auto & set : ccs) {
45            out << joiner;
46            set->print(out);
47            joiner = ',';
48        }
49        out << ']';
50        return out.str();
51    }
52}
53
54CharClassesSignature::CharClassesSignature(const std::vector<CC *> &ccs, bool useDirectCC)
55: mUseDirectCC(useDirectCC), mSignature((useDirectCC ? "d" : "p") + signature(ccs)) {
56}
57
58
59CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<CC *> && ccs, bool useDirectCC, cc::BitNumbering basisNumbering)
60: CharClassesSignature(ccs, useDirectCC)
61, PabloKernel(iBuilder,
62              "cc" + sha1sum(mSignature),
63              {},
64              {Binding{iBuilder->getStreamSetTy(ccs.size(), 1), "charclasses"}})
65, mCCs(std::move(ccs)), mBasisSetNumbering(basisNumbering) {
66    if (useDirectCC) {
67        mStreamSetInputs.push_back({Binding{iBuilder->getStreamSetTy(1, 8), "byteData"}});
68    }
69    else {
70        mStreamSetInputs.push_back({Binding{iBuilder->getStreamSetTy(8), "basis"}});
71    }
72}
73
74std::string CharClassesKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
75    return mSignature;
76}
77
78void CharClassesKernel::generatePabloMethod() {
79    PabloBuilder pb(getEntryScope());
80    std::unique_ptr<CC_Compiler> ccc;
81    if (mUseDirectCC) {
82        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
83    } else {
84        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
85    }
86    unsigned n = mCCs.size();
87
88    NameMap nameMap;
89    std::vector<Name *> names;
90    for (unsigned i = 0; i < n; i++) {
91        Name * name = re::makeName("mpx_basis" + std::to_string(i), mCCs[i]);
92        nameMap.emplace(name, nullptr);
93        names.push_back(name);
94    }
95
96    UCD::UCDCompiler ucdCompiler(*ccc.get());
97    if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
98        ucdCompiler.generateWithoutIfHierarchy(nameMap, pb);
99    } else {
100        ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
101    }
102
103    // The first UnicodeSet in the vector ccs represents the last bit of the character class basis bit streams.
104    std::reverse(names.begin(), names.end());
105    for (unsigned i = 0; i < names.size(); i++) {
106        auto t = nameMap.find(names[i]); 
107        if (t != nameMap.end()) {
108            PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(i));
109            pb.createAssign(r, pb.createInFile(t->second));
110        } else {
111            llvm::report_fatal_error("Can't compile character classes.");
112        }
113    }
114}
115
116
Note: See TracBrowser for help on using the repository browser.